1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 #include <petscdevice.h> 12 #include <petsc/private/deviceimpl.h> 13 14 /*@ 15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the sequential or MPI `MATDENSE` matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 35 PetscAssertPointer(B, 2); 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 37 if (flg) *B = mat->A; 38 else { 39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 41 *B = A; 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 47 { 48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 50 51 PetscFunctionBegin; 52 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 60 PetscScalar *v; 61 62 PetscFunctionBegin; 63 PetscCall(MatDenseGetArray(mat->A, &v)); 64 PetscCall(MatDenseGetLDA(mat->A, &lda)); 65 rend2 = PetscMin(rend, A->cmap->N); 66 if (rend2 > rstart) { 67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 68 PetscCall(PetscLogFlops(rend2 - rstart)); 69 } 70 PetscCall(MatDenseRestoreArray(mat->A, &v)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 75 { 76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 78 79 PetscFunctionBegin; 80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 81 lrow = row - rstart; 82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 87 { 88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 90 91 PetscFunctionBegin; 92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 93 lrow = row - rstart; 94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 102 PetscScalar *array; 103 MPI_Comm comm; 104 PetscBool flg; 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(MatHasCongruentLayouts(A, &flg)); 109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 112 #if PetscDefined(HAVE_CUDA) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 115 #elif PetscDefined(HAVE_HIP) 116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 118 #endif 119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 120 PetscCall(MatCreate(comm, &B)); 121 PetscCall(MatSetSizes(B, m, m, m, m)); 122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 127 *a = B; 128 PetscCall(MatDestroy(&B)); 129 } else *a = B; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 134 { 135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 137 PetscBool roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i = 0; i < m; i++) { 141 if (idxm[i] < 0) continue; 142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 147 } else { 148 for (j = 0; j < n; j++) { 149 if (idxn[j] < 0) continue; 150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 152 } 153 } 154 } else if (!A->donotstash) { 155 mat->assembled = PETSC_FALSE; 156 if (roworiented) { 157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 158 } else { 159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 160 } 161 } 162 } 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 167 { 168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 170 171 PetscFunctionBegin; 172 for (i = 0; i < m; i++) { 173 if (idxm[i] < 0) continue; /* negative row */ 174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 175 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 176 row = idxm[i] - rstart; 177 for (j = 0; j < n; j++) { 178 if (idxn[j] < 0) continue; /* negative column */ 179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 181 } 182 } 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 189 190 PetscFunctionBegin; 191 PetscCall(MatDenseGetLDA(a->A, lda)); 192 PetscFunctionReturn(PETSC_SUCCESS); 193 } 194 195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 198 MatType mtype = MATSEQDENSE; 199 200 PetscFunctionBegin; 201 if (!a->A) { 202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 203 PetscCall(PetscLayoutSetUp(A->rmap)); 204 PetscCall(PetscLayoutSetUp(A->cmap)); 205 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 206 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 207 #if PetscDefined(HAVE_CUDA) 208 PetscBool iscuda; 209 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 210 if (iscuda) mtype = MATSEQDENSECUDA; 211 #elif PetscDefined(HAVE_HIP) 212 PetscBool iship; 213 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 214 if (iship) mtype = MATSEQDENSEHIP; 215 #endif 216 PetscCall(MatSetType(a->A, mtype)); 217 } 218 PetscCall(MatDenseSetLDA(a->A, lda)); 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 223 { 224 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 225 226 PetscFunctionBegin; 227 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 228 PetscCall(MatDenseGetArray(a->A, array)); 229 PetscFunctionReturn(PETSC_SUCCESS); 230 } 231 232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 233 { 234 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 235 236 PetscFunctionBegin; 237 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 238 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 243 { 244 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 245 246 PetscFunctionBegin; 247 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 248 PetscCall(MatDenseGetArrayWrite(a->A, array)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 253 { 254 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 255 256 PetscFunctionBegin; 257 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 258 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 259 PetscCall(MatDensePlaceArray(a->A, array)); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 264 { 265 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 266 267 PetscFunctionBegin; 268 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 269 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 270 PetscCall(MatDenseResetArray(a->A)); 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 277 278 PetscFunctionBegin; 279 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 280 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 281 PetscCall(MatDenseReplaceArray(a->A, array)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 286 { 287 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 288 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 289 const PetscInt *irow, *icol; 290 const PetscScalar *v; 291 PetscScalar *bv; 292 Mat newmat; 293 IS iscol_local; 294 MPI_Comm comm_is, comm_mat; 295 296 PetscFunctionBegin; 297 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 298 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 299 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 300 301 PetscCall(ISAllGather(iscol, &iscol_local)); 302 PetscCall(ISGetIndices(isrow, &irow)); 303 PetscCall(ISGetIndices(iscol_local, &icol)); 304 PetscCall(ISGetLocalSize(isrow, &nrows)); 305 PetscCall(ISGetLocalSize(iscol, &ncols)); 306 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 307 308 /* No parallel redistribution currently supported! Should really check each index set 309 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 310 original matrix! */ 311 312 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 313 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 314 315 /* Check submatrix call */ 316 if (scall == MAT_REUSE_MATRIX) { 317 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 318 /* Really need to test rows and column sizes! */ 319 newmat = *B; 320 } else { 321 /* Create and fill new matrix */ 322 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 323 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 324 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 325 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 326 } 327 328 /* Now extract the data pointers and do the copy, column at a time */ 329 newmatd = (Mat_MPIDense *)newmat->data; 330 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 331 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 332 PetscCall(MatDenseGetLDA(mat->A, &lda)); 333 for (i = 0; i < Ncols; i++) { 334 const PetscScalar *av = v + lda * icol[i]; 335 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 336 } 337 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 338 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 339 340 /* Assemble the matrices so that the correct flags are set */ 341 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 342 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 343 344 /* Free work space */ 345 PetscCall(ISRestoreIndices(isrow, &irow)); 346 PetscCall(ISRestoreIndices(iscol_local, &icol)); 347 PetscCall(ISDestroy(&iscol_local)); 348 *B = newmat; 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 353 { 354 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 355 356 PetscFunctionBegin; 357 PetscCall(MatDenseRestoreArray(a->A, array)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 362 { 363 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 364 365 PetscFunctionBegin; 366 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 367 PetscFunctionReturn(PETSC_SUCCESS); 368 } 369 370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 371 { 372 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 373 374 PetscFunctionBegin; 375 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 380 { 381 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 382 PetscInt nstash, reallocs; 383 384 PetscFunctionBegin; 385 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 386 387 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 388 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 389 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 396 PetscInt i, *row, *col, flg, j, rstart, ncols; 397 PetscMPIInt n; 398 PetscScalar *val; 399 400 PetscFunctionBegin; 401 if (!mdn->donotstash && !mat->nooffprocentries) { 402 /* wait on receives */ 403 while (1) { 404 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 405 if (!flg) break; 406 407 for (i = 0; i < n;) { 408 /* Now identify the consecutive vals belonging to the same row */ 409 for (j = i, rstart = row[j]; j < n; j++) { 410 if (row[j] != rstart) break; 411 } 412 if (j < n) ncols = j - i; 413 else ncols = n - i; 414 /* Now assemble all these values with a single function call */ 415 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 416 i = j; 417 } 418 } 419 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 420 } 421 422 PetscCall(MatAssemblyBegin(mdn->A, mode)); 423 PetscCall(MatAssemblyEnd(mdn->A, mode)); 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 428 { 429 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 430 431 PetscFunctionBegin; 432 PetscCall(MatZeroEntries(l->A)); 433 PetscFunctionReturn(PETSC_SUCCESS); 434 } 435 436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 437 { 438 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 439 PetscInt i, len, *lrows; 440 441 PetscFunctionBegin; 442 /* get locally owned rows */ 443 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 444 /* fix right-hand side if needed */ 445 if (x && b) { 446 const PetscScalar *xx; 447 PetscScalar *bb; 448 449 PetscCall(VecGetArrayRead(x, &xx)); 450 PetscCall(VecGetArrayWrite(b, &bb)); 451 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 452 PetscCall(VecRestoreArrayRead(x, &xx)); 453 PetscCall(VecRestoreArrayWrite(b, &bb)); 454 } 455 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 456 if (diag != 0.0) { 457 Vec d; 458 459 PetscCall(MatCreateVecs(A, NULL, &d)); 460 PetscCall(VecSet(d, diag)); 461 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 462 PetscCall(VecDestroy(&d)); 463 } 464 PetscCall(PetscFree(lrows)); 465 PetscFunctionReturn(PETSC_SUCCESS); 466 } 467 468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 472 473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype, aymtype; 479 480 PetscFunctionBegin; 481 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 482 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 483 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 484 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 485 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 486 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 487 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 488 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end)); 489 PetscFunctionReturn(PETSC_SUCCESS); 490 } 491 492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 493 { 494 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype, aymtype; 498 499 PetscFunctionBegin; 500 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 501 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 502 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 503 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 504 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 505 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 506 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 507 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 512 { 513 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype, aymtype; 517 518 PetscFunctionBegin; 519 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 520 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 521 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 522 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 523 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 524 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 525 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 526 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 539 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 540 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 541 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 542 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 543 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 544 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 545 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 PetscInt r_start, r_end; 556 PetscInt c_start_local, c_end_local; 557 558 PetscFunctionBegin; 559 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 560 PetscCall(VecZeroEntries(a->lvec)); 561 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end)); 562 c_start_local = PetscMax(c_start, r_start); 563 c_end_local = PetscMin(c_end, r_end); 564 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 565 if (c_end_local > c_start_local) { 566 if (PetscMemTypeHost(aymtype)) { 567 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local))); 568 } else { 569 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start)))); 570 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local))); 571 } 572 } 573 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 574 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 575 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 576 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 577 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 578 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 579 PetscFunctionReturn(PETSC_SUCCESS); 580 } 581 582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 583 { 584 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 585 const PetscScalar *ax; 586 PetscScalar *ay; 587 PetscMemType axmtype, aymtype; 588 589 PetscFunctionBegin; 590 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 591 PetscCall(VecSet(yy, 0.0)); 592 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 593 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 594 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 595 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 596 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 597 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 598 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 599 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 604 { 605 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 606 const PetscScalar *ax; 607 PetscScalar *ay; 608 PetscMemType axmtype, aymtype; 609 610 PetscFunctionBegin; 611 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 612 PetscCall(VecCopy(yy, zz)); 613 PetscCall(VecZeroEntries(a->lvec)); 614 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 615 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 616 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 617 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 618 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 619 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 620 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 625 { 626 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 627 const PetscScalar *ax; 628 PetscScalar *ay; 629 PetscMemType axmtype, aymtype; 630 631 PetscFunctionBegin; 632 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 633 PetscCall(VecCopy(yy, zz)); 634 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 635 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 636 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 637 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 638 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 639 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 640 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 641 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 646 { 647 PetscFunctionBegin; 648 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 653 { 654 PetscFunctionBegin; 655 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 660 { 661 PetscFunctionBegin; 662 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 663 PetscFunctionReturn(PETSC_SUCCESS); 664 } 665 666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 667 { 668 PetscFunctionBegin; 669 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 674 { 675 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 676 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 677 PetscScalar *x; 678 const PetscScalar *av; 679 680 PetscFunctionBegin; 681 PetscCall(VecGetArray(v, &x)); 682 PetscCall(VecGetSize(v, &ng)); 683 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 684 PetscCall(VecGetLocalSize(v, &nl)); 685 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 686 radd = A->rmap->rstart * m; 687 PetscCall(MatDenseGetArrayRead(a->A, &av)); 688 PetscCall(MatDenseGetLDA(a->A, &lda)); 689 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 690 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 691 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 692 PetscCall(VecRestoreArray(v, &x)); 693 PetscFunctionReturn(PETSC_SUCCESS); 694 } 695 696 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 697 { 698 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 699 700 PetscFunctionBegin; 701 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 702 PetscCall(MatStashDestroy_Private(&mat->stash)); 703 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 704 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 705 PetscCall(MatDestroy(&mdn->A)); 706 PetscCall(VecDestroy(&mdn->lvec)); 707 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 708 PetscCall(VecDestroy(&mdn->cvec)); 709 PetscCall(MatDestroy(&mdn->cmat)); 710 711 PetscCall(PetscFree(mat->data)); 712 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 713 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 727 #if defined(PETSC_HAVE_ELEMENTAL) 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 729 #endif 730 #if defined(PETSC_HAVE_SCALAPACK) 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 732 #endif 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 736 #if defined(PETSC_HAVE_CUDA) 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 755 #endif 756 #if defined(PETSC_HAVE_HIP) 757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 775 #endif 776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 790 791 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 #include <petscdraw.h> 796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 797 { 798 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 799 PetscMPIInt rank; 800 PetscViewerType vtype; 801 PetscBool isascii, isdraw; 802 PetscViewer sviewer; 803 PetscViewerFormat format; 804 805 PetscFunctionBegin; 806 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 807 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 809 if (isascii) { 810 PetscCall(PetscViewerGetType(viewer, &vtype)); 811 PetscCall(PetscViewerGetFormat(viewer, &format)); 812 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 813 MatInfo info; 814 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 815 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 816 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 817 (PetscInt)info.memory)); 818 PetscCall(PetscViewerFlush(viewer)); 819 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 820 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } else if (format == PETSC_VIEWER_ASCII_INFO) { 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 } else if (isdraw) { 826 PetscDraw draw; 827 PetscBool isnull; 828 829 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 830 PetscCall(PetscDrawIsNull(draw, &isnull)); 831 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 { 835 /* assemble the entire matrix onto first processor. */ 836 Mat A; 837 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 838 PetscInt *cols; 839 PetscScalar *vals; 840 841 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 842 if (rank == 0) { 843 PetscCall(MatSetSizes(A, M, N, M, N)); 844 } else { 845 PetscCall(MatSetSizes(A, 0, 0, M, N)); 846 } 847 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 848 PetscCall(MatSetType(A, MATMPIDENSE)); 849 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 850 851 /* Copy the matrix ... This isn't the most efficient means, 852 but it's quick for now */ 853 A->insertmode = INSERT_VALUES; 854 855 row = mat->rmap->rstart; 856 m = mdn->A->rmap->n; 857 for (i = 0; i < m; i++) { 858 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 859 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 860 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 861 row++; 862 } 863 864 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 865 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 866 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 867 if (rank == 0) { 868 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 869 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 870 } 871 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 872 PetscCall(MatDestroy(&A)); 873 } 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 878 { 879 PetscBool isascii, isbinary, isdraw, issocket; 880 881 PetscFunctionBegin; 882 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 886 887 if (isascii || issocket || isdraw) { 888 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 889 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 890 PetscFunctionReturn(PETSC_SUCCESS); 891 } 892 893 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 894 { 895 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 896 Mat mdn = mat->A; 897 PetscLogDouble isend[5], irecv[5]; 898 899 PetscFunctionBegin; 900 info->block_size = 1.0; 901 902 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 903 904 isend[0] = info->nz_used; 905 isend[1] = info->nz_allocated; 906 isend[2] = info->nz_unneeded; 907 isend[3] = info->memory; 908 isend[4] = info->mallocs; 909 if (flag == MAT_LOCAL) { 910 info->nz_used = isend[0]; 911 info->nz_allocated = isend[1]; 912 info->nz_unneeded = isend[2]; 913 info->memory = isend[3]; 914 info->mallocs = isend[4]; 915 } else if (flag == MAT_GLOBAL_MAX) { 916 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 917 918 info->nz_used = irecv[0]; 919 info->nz_allocated = irecv[1]; 920 info->nz_unneeded = irecv[2]; 921 info->memory = irecv[3]; 922 info->mallocs = irecv[4]; 923 } else if (flag == MAT_GLOBAL_SUM) { 924 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 925 926 info->nz_used = irecv[0]; 927 info->nz_allocated = irecv[1]; 928 info->nz_unneeded = irecv[2]; 929 info->memory = irecv[3]; 930 info->mallocs = irecv[4]; 931 } 932 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 933 info->fill_ratio_needed = 0; 934 info->factor_mallocs = 0; 935 PetscFunctionReturn(PETSC_SUCCESS); 936 } 937 938 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 941 942 PetscFunctionBegin; 943 switch (op) { 944 case MAT_NEW_NONZERO_LOCATIONS: 945 case MAT_NEW_NONZERO_LOCATION_ERR: 946 case MAT_NEW_NONZERO_ALLOCATION_ERR: 947 MatCheckPreallocated(A, 1); 948 PetscCall(MatSetOption(a->A, op, flg)); 949 break; 950 case MAT_ROW_ORIENTED: 951 MatCheckPreallocated(A, 1); 952 a->roworiented = flg; 953 PetscCall(MatSetOption(a->A, op, flg)); 954 break; 955 case MAT_IGNORE_OFF_PROC_ENTRIES: 956 a->donotstash = flg; 957 break; 958 case MAT_SYMMETRIC: 959 case MAT_STRUCTURALLY_SYMMETRIC: 960 case MAT_HERMITIAN: 961 case MAT_SYMMETRY_ETERNAL: 962 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 963 case MAT_SPD: 964 case MAT_SPD_ETERNAL: 965 /* if the diagonal matrix is square it inherits some of the properties above */ 966 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 967 break; 968 default: 969 break; 970 } 971 PetscFunctionReturn(PETSC_SUCCESS); 972 } 973 974 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 975 { 976 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 977 const PetscScalar *l; 978 PetscScalar x, *v, *vv, *r; 979 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 980 981 PetscFunctionBegin; 982 PetscCall(MatDenseGetArray(mdn->A, &vv)); 983 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 984 PetscCall(MatGetLocalSize(A, &s2, &s3)); 985 if (ll) { 986 PetscCall(VecGetLocalSize(ll, &s2a)); 987 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 988 PetscCall(VecGetArrayRead(ll, &l)); 989 for (i = 0; i < m; i++) { 990 x = l[i]; 991 v = vv + i; 992 for (j = 0; j < n; j++) { 993 (*v) *= x; 994 v += lda; 995 } 996 } 997 PetscCall(VecRestoreArrayRead(ll, &l)); 998 PetscCall(PetscLogFlops(1.0 * n * m)); 999 } 1000 if (rr) { 1001 const PetscScalar *ar; 1002 1003 PetscCall(VecGetLocalSize(rr, &s3a)); 1004 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 1005 PetscCall(VecGetArrayRead(rr, &ar)); 1006 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 1007 PetscCall(VecGetArray(mdn->lvec, &r)); 1008 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1009 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1010 PetscCall(VecRestoreArrayRead(rr, &ar)); 1011 for (i = 0; i < n; i++) { 1012 x = r[i]; 1013 v = vv + i * lda; 1014 for (j = 0; j < m; j++) (*v++) *= x; 1015 } 1016 PetscCall(VecRestoreArray(mdn->lvec, &r)); 1017 PetscCall(PetscLogFlops(1.0 * n * m)); 1018 } 1019 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 1020 PetscFunctionReturn(PETSC_SUCCESS); 1021 } 1022 1023 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1024 { 1025 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1026 PetscInt i, j; 1027 PetscMPIInt size; 1028 PetscReal sum = 0.0; 1029 const PetscScalar *av, *v; 1030 1031 PetscFunctionBegin; 1032 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1033 v = av; 1034 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1035 if (size == 1) { 1036 PetscCall(MatNorm(mdn->A, type, nrm)); 1037 } else { 1038 if (type == NORM_FROBENIUS) { 1039 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1040 sum += PetscRealPart(PetscConj(*v) * (*v)); 1041 v++; 1042 } 1043 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1044 *nrm = PetscSqrtReal(*nrm); 1045 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1046 } else if (type == NORM_1) { 1047 PetscReal *tmp; 1048 1049 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1050 *nrm = 0.0; 1051 v = av; 1052 for (j = 0; j < mdn->A->cmap->n; j++) { 1053 for (i = 0; i < mdn->A->rmap->n; i++) { 1054 tmp[j] += PetscAbsScalar(*v); 1055 v++; 1056 } 1057 } 1058 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1059 for (j = 0; j < A->cmap->N; j++) { 1060 if (tmp[j] > *nrm) *nrm = tmp[j]; 1061 } 1062 PetscCall(PetscFree(tmp)); 1063 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1064 } else if (type == NORM_INFINITY) { /* max row norm */ 1065 PetscCall(MatNorm(mdn->A, type, nrm)); 1066 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1067 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1068 } 1069 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1070 PetscFunctionReturn(PETSC_SUCCESS); 1071 } 1072 1073 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1076 Mat B; 1077 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1078 PetscInt j, i, lda; 1079 PetscScalar *v; 1080 1081 PetscFunctionBegin; 1082 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1083 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1084 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1085 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1086 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1087 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1088 } else B = *matout; 1089 1090 m = a->A->rmap->n; 1091 n = a->A->cmap->n; 1092 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1093 PetscCall(MatDenseGetLDA(a->A, &lda)); 1094 PetscCall(PetscMalloc1(m, &rwork)); 1095 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1096 for (j = 0; j < n; j++) { 1097 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1098 v = PetscSafePointerPlusOffset(v, lda); 1099 } 1100 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1101 PetscCall(PetscFree(rwork)); 1102 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1103 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1104 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1105 *matout = B; 1106 } else { 1107 PetscCall(MatHeaderMerge(A, &B)); 1108 } 1109 PetscFunctionReturn(PETSC_SUCCESS); 1110 } 1111 1112 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1113 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1114 1115 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1116 { 1117 PetscFunctionBegin; 1118 PetscCall(PetscLayoutSetUp(A->rmap)); 1119 PetscCall(PetscLayoutSetUp(A->cmap)); 1120 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1121 PetscFunctionReturn(PETSC_SUCCESS); 1122 } 1123 1124 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1125 { 1126 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1127 1128 PetscFunctionBegin; 1129 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1134 { 1135 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1136 1137 PetscFunctionBegin; 1138 PetscCall(MatConjugate(a->A)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1143 { 1144 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1145 1146 PetscFunctionBegin; 1147 PetscCall(MatRealPart(a->A)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1152 { 1153 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1154 1155 PetscFunctionBegin; 1156 PetscCall(MatImaginaryPart(a->A)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1161 { 1162 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1163 1164 PetscFunctionBegin; 1165 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1166 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1167 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1168 PetscFunctionReturn(PETSC_SUCCESS); 1169 } 1170 1171 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1172 1173 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1174 { 1175 PetscInt i, m, n; 1176 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatGetSize(A, &m, &n)); 1180 if (type == REDUCTION_MEAN_REALPART) { 1181 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1182 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1183 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1184 } else { 1185 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1186 } 1187 if (type == NORM_2) { 1188 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1189 } 1190 if (type == NORM_INFINITY) { 1191 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1192 } else { 1193 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1194 } 1195 if (type == NORM_2) { 1196 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1197 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1198 for (i = 0; i < n; i++) reductions[i] /= m; 1199 } 1200 PetscFunctionReturn(PETSC_SUCCESS); 1201 } 1202 1203 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1204 { 1205 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1206 1207 PetscFunctionBegin; 1208 PetscCall(MatSetRandom(d->A, rctx)); 1209 #if defined(PETSC_HAVE_DEVICE) 1210 x->offloadmask = d->A->offloadmask; 1211 #endif 1212 PetscFunctionReturn(PETSC_SUCCESS); 1213 } 1214 1215 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1216 { 1217 PetscFunctionBegin; 1218 *missing = PETSC_FALSE; 1219 PetscFunctionReturn(PETSC_SUCCESS); 1220 } 1221 1222 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1223 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1224 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1225 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1226 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1227 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1228 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1229 1230 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1231 MatGetRow_MPIDense, 1232 MatRestoreRow_MPIDense, 1233 MatMult_MPIDense, 1234 /* 4*/ MatMultAdd_MPIDense, 1235 MatMultTranspose_MPIDense, 1236 MatMultTransposeAdd_MPIDense, 1237 NULL, 1238 NULL, 1239 NULL, 1240 /* 10*/ NULL, 1241 NULL, 1242 NULL, 1243 NULL, 1244 MatTranspose_MPIDense, 1245 /* 15*/ MatGetInfo_MPIDense, 1246 MatEqual_MPIDense, 1247 MatGetDiagonal_MPIDense, 1248 MatDiagonalScale_MPIDense, 1249 MatNorm_MPIDense, 1250 /* 20*/ MatAssemblyBegin_MPIDense, 1251 MatAssemblyEnd_MPIDense, 1252 MatSetOption_MPIDense, 1253 MatZeroEntries_MPIDense, 1254 /* 24*/ MatZeroRows_MPIDense, 1255 NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 /* 29*/ MatSetUp_MPIDense, 1260 NULL, 1261 NULL, 1262 MatGetDiagonalBlock_MPIDense, 1263 NULL, 1264 /* 34*/ MatDuplicate_MPIDense, 1265 NULL, 1266 NULL, 1267 NULL, 1268 NULL, 1269 /* 39*/ MatAXPY_MPIDense, 1270 MatCreateSubMatrices_MPIDense, 1271 NULL, 1272 MatGetValues_MPIDense, 1273 MatCopy_MPIDense, 1274 /* 44*/ NULL, 1275 MatScale_MPIDense, 1276 MatShift_MPIDense, 1277 NULL, 1278 NULL, 1279 /* 49*/ MatSetRandom_MPIDense, 1280 NULL, 1281 NULL, 1282 NULL, 1283 NULL, 1284 /* 54*/ NULL, 1285 NULL, 1286 NULL, 1287 NULL, 1288 NULL, 1289 /* 59*/ MatCreateSubMatrix_MPIDense, 1290 MatDestroy_MPIDense, 1291 MatView_MPIDense, 1292 NULL, 1293 NULL, 1294 /* 64*/ NULL, 1295 NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 /* 69*/ NULL, 1300 NULL, 1301 NULL, 1302 NULL, 1303 NULL, 1304 /* 74*/ NULL, 1305 NULL, 1306 NULL, 1307 NULL, 1308 MatLoad_MPIDense, 1309 /* 79*/ NULL, 1310 NULL, 1311 NULL, 1312 NULL, 1313 /* 83*/ NULL, 1314 NULL, 1315 NULL, 1316 NULL, 1317 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1318 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1319 /* 89*/ NULL, 1320 MatProductSetFromOptions_MPIDense, 1321 NULL, 1322 NULL, 1323 MatConjugate_MPIDense, 1324 /* 94*/ NULL, 1325 NULL, 1326 MatRealPart_MPIDense, 1327 MatImaginaryPart_MPIDense, 1328 NULL, 1329 /*99*/ NULL, 1330 NULL, 1331 NULL, 1332 NULL, 1333 MatGetColumnVector_MPIDense, 1334 /*104*/ MatMissingDiagonal_MPIDense, 1335 NULL, 1336 NULL, 1337 NULL, 1338 NULL, 1339 /*109*/ NULL, 1340 NULL, 1341 NULL, 1342 MatMultHermitianTranspose_MPIDense, 1343 MatMultHermitianTransposeAdd_MPIDense, 1344 /*114*/ NULL, 1345 NULL, 1346 MatGetColumnReductions_MPIDense, 1347 NULL, 1348 NULL, 1349 NULL, 1350 /*120*/ NULL, 1351 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1352 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1353 NULL, 1354 /*124*/ NULL, 1355 NULL, 1356 NULL, 1357 NULL, 1358 NULL, 1359 /*129*/ NULL, 1360 NULL, 1361 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1362 NULL, 1363 NULL, 1364 /*134*/ NULL, 1365 NULL, 1366 NULL, 1367 NULL, 1368 NULL, 1369 /*139*/ NULL, 1370 NULL, 1371 NULL, 1372 NULL, 1373 NULL}; 1374 1375 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1376 { 1377 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1378 MatType mtype = MATSEQDENSE; 1379 1380 PetscFunctionBegin; 1381 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1382 PetscCall(PetscLayoutSetUp(mat->rmap)); 1383 PetscCall(PetscLayoutSetUp(mat->cmap)); 1384 if (!a->A) { 1385 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1386 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1387 } 1388 #if defined(PETSC_HAVE_CUDA) 1389 PetscBool iscuda; 1390 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1391 if (iscuda) mtype = MATSEQDENSECUDA; 1392 #endif 1393 #if defined(PETSC_HAVE_HIP) 1394 PetscBool iship; 1395 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1396 if (iship) mtype = MATSEQDENSEHIP; 1397 #endif 1398 PetscCall(MatSetType(a->A, mtype)); 1399 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1400 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1401 mat->offloadmask = a->A->offloadmask; 1402 #endif 1403 mat->preallocated = PETSC_TRUE; 1404 mat->assembled = PETSC_TRUE; 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1409 { 1410 Mat B, C; 1411 1412 PetscFunctionBegin; 1413 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1414 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1415 PetscCall(MatDestroy(&C)); 1416 if (reuse == MAT_REUSE_MATRIX) { 1417 C = *newmat; 1418 } else C = NULL; 1419 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1420 PetscCall(MatDestroy(&B)); 1421 if (reuse == MAT_INPLACE_MATRIX) { 1422 PetscCall(MatHeaderReplace(A, &C)); 1423 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1424 PetscFunctionReturn(PETSC_SUCCESS); 1425 } 1426 1427 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1428 { 1429 Mat B, C; 1430 1431 PetscFunctionBegin; 1432 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1433 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1434 if (reuse == MAT_REUSE_MATRIX) { 1435 C = *newmat; 1436 } else C = NULL; 1437 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1438 PetscCall(MatDestroy(&B)); 1439 if (reuse == MAT_INPLACE_MATRIX) { 1440 PetscCall(MatHeaderReplace(A, &C)); 1441 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1442 PetscFunctionReturn(PETSC_SUCCESS); 1443 } 1444 1445 #if defined(PETSC_HAVE_ELEMENTAL) 1446 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1447 { 1448 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1449 Mat mat_elemental; 1450 PetscScalar *v; 1451 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1452 1453 PetscFunctionBegin; 1454 if (reuse == MAT_REUSE_MATRIX) { 1455 mat_elemental = *newmat; 1456 PetscCall(MatZeroEntries(*newmat)); 1457 } else { 1458 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1459 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1460 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1461 PetscCall(MatSetUp(mat_elemental)); 1462 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1463 } 1464 1465 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1466 for (i = 0; i < N; i++) cols[i] = i; 1467 for (i = 0; i < m; i++) rows[i] = rstart + i; 1468 1469 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1470 PetscCall(MatDenseGetArray(A, &v)); 1471 PetscCall(MatDenseGetLDA(a->A, &lda)); 1472 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1473 else { 1474 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1475 } 1476 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1477 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1478 PetscCall(MatDenseRestoreArray(A, &v)); 1479 PetscCall(PetscFree2(rows, cols)); 1480 1481 if (reuse == MAT_INPLACE_MATRIX) { 1482 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1483 } else { 1484 *newmat = mat_elemental; 1485 } 1486 PetscFunctionReturn(PETSC_SUCCESS); 1487 } 1488 #endif 1489 1490 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1491 { 1492 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1493 1494 PetscFunctionBegin; 1495 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1500 { 1501 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1505 PetscFunctionReturn(PETSC_SUCCESS); 1506 } 1507 1508 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1509 { 1510 Mat_MPIDense *mat; 1511 PetscInt m, nloc, N; 1512 1513 PetscFunctionBegin; 1514 PetscCall(MatGetSize(inmat, &m, &N)); 1515 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1516 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1517 PetscInt sum; 1518 1519 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1520 /* Check sum(n) = N */ 1521 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1522 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1523 1524 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1525 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1526 } 1527 1528 /* numeric phase */ 1529 mat = (Mat_MPIDense *)(*outmat)->data; 1530 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1531 PetscFunctionReturn(PETSC_SUCCESS); 1532 } 1533 1534 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1535 { 1536 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1537 PetscInt lda; 1538 1539 PetscFunctionBegin; 1540 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1541 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1542 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1543 a->vecinuse = col + 1; 1544 PetscCall(MatDenseGetLDA(a->A, &lda)); 1545 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1546 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1547 *v = a->cvec; 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1552 { 1553 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1554 1555 PetscFunctionBegin; 1556 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1557 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1558 VecCheckAssembled(a->cvec); 1559 a->vecinuse = 0; 1560 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1561 PetscCall(VecResetArray(a->cvec)); 1562 if (v) *v = NULL; 1563 PetscFunctionReturn(PETSC_SUCCESS); 1564 } 1565 1566 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1567 { 1568 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1569 PetscInt lda; 1570 1571 PetscFunctionBegin; 1572 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1573 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1574 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1575 a->vecinuse = col + 1; 1576 PetscCall(MatDenseGetLDA(a->A, &lda)); 1577 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1578 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1579 PetscCall(VecLockReadPush(a->cvec)); 1580 *v = a->cvec; 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1585 { 1586 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1587 1588 PetscFunctionBegin; 1589 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1590 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1591 VecCheckAssembled(a->cvec); 1592 a->vecinuse = 0; 1593 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1594 PetscCall(VecLockReadPop(a->cvec)); 1595 PetscCall(VecResetArray(a->cvec)); 1596 if (v) *v = NULL; 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1601 { 1602 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1603 PetscInt lda; 1604 1605 PetscFunctionBegin; 1606 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1607 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1608 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1609 a->vecinuse = col + 1; 1610 PetscCall(MatDenseGetLDA(a->A, &lda)); 1611 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1612 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1613 *v = a->cvec; 1614 PetscFunctionReturn(PETSC_SUCCESS); 1615 } 1616 1617 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1618 { 1619 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1620 1621 PetscFunctionBegin; 1622 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1623 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1624 VecCheckAssembled(a->cvec); 1625 a->vecinuse = 0; 1626 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1627 PetscCall(VecResetArray(a->cvec)); 1628 if (v) *v = NULL; 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1633 { 1634 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1635 Mat_MPIDense *c; 1636 MPI_Comm comm; 1637 PetscInt prbegin, prend, pcbegin, pcend; 1638 1639 PetscFunctionBegin; 1640 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1641 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1642 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1643 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1644 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1645 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart); 1646 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart)); 1647 if (!a->cmat) { 1648 PetscCall(MatCreate(comm, &a->cmat)); 1649 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1650 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1651 else { 1652 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1653 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1654 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1655 } 1656 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap)); 1657 else { 1658 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1659 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1660 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1661 } 1662 c = (Mat_MPIDense *)a->cmat->data; 1663 c->sub_rbegin = rbegin; 1664 c->sub_rend = rend; 1665 c->sub_cbegin = cbegin; 1666 c->sub_cend = cend; 1667 } 1668 c = (Mat_MPIDense *)a->cmat->data; 1669 if (c->sub_rbegin != rbegin || c->sub_rend != rend) { 1670 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1671 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1672 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1673 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1674 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1675 c->sub_rbegin = rbegin; 1676 c->sub_rend = rend; 1677 } 1678 if (c->sub_cbegin != cbegin || c->sub_cend != cend) { 1679 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary 1680 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) { 1681 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1682 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1683 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1684 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1685 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1686 PetscCall(VecDestroy(&c->lvec)); 1687 PetscCall(PetscSFDestroy(&c->Mvctx)); 1688 } 1689 c->sub_cbegin = cbegin; 1690 c->sub_cend = cend; 1691 } 1692 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1693 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A)); 1694 1695 a->cmat->preallocated = PETSC_TRUE; 1696 a->cmat->assembled = PETSC_TRUE; 1697 #if defined(PETSC_HAVE_DEVICE) 1698 a->cmat->offloadmask = c->A->offloadmask; 1699 #endif 1700 a->matinuse = cbegin + 1; 1701 *v = a->cmat; 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1706 { 1707 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1708 Mat_MPIDense *c; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1712 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1713 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1714 a->matinuse = 0; 1715 c = (Mat_MPIDense *)a->cmat->data; 1716 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1717 if (v) *v = NULL; 1718 #if defined(PETSC_HAVE_DEVICE) 1719 A->offloadmask = a->A->offloadmask; 1720 #endif 1721 PetscFunctionReturn(PETSC_SUCCESS); 1722 } 1723 1724 /*MC 1725 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1726 1727 Options Database Key: 1728 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1729 1730 Level: beginner 1731 1732 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1733 M*/ 1734 PetscErrorCode MatCreate_MPIDense(Mat mat) 1735 { 1736 Mat_MPIDense *a; 1737 1738 PetscFunctionBegin; 1739 PetscCall(PetscNew(&a)); 1740 mat->data = (void *)a; 1741 mat->ops[0] = MatOps_Values; 1742 1743 mat->insertmode = NOT_SET_VALUES; 1744 1745 /* build cache for off array entries formed */ 1746 a->donotstash = PETSC_FALSE; 1747 1748 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1749 1750 /* stuff used for matrix vector multiply */ 1751 a->lvec = NULL; 1752 a->Mvctx = NULL; 1753 a->roworiented = PETSC_TRUE; 1754 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1776 #if defined(PETSC_HAVE_ELEMENTAL) 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1778 #endif 1779 #if defined(PETSC_HAVE_SCALAPACK) 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1781 #endif 1782 #if defined(PETSC_HAVE_CUDA) 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1784 #endif 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1788 #if defined(PETSC_HAVE_CUDA) 1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1791 #endif 1792 #if defined(PETSC_HAVE_HIP) 1793 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1796 #endif 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1802 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1803 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 /*MC 1808 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1809 1810 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1811 and `MATMPIDENSE` otherwise. 1812 1813 Options Database Key: 1814 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1815 1816 Level: beginner 1817 1818 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1819 M*/ 1820 1821 /*@ 1822 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1823 1824 Collective 1825 1826 Input Parameters: 1827 + B - the matrix 1828 - data - optional location of matrix data. Set to `NULL` for PETSc 1829 to control all matrix memory allocation. 1830 1831 Level: intermediate 1832 1833 Notes: 1834 The dense format is fully compatible with standard Fortran 1835 storage by columns. 1836 1837 The data input variable is intended primarily for Fortran programmers 1838 who wish to allocate their own matrix memory space. Most users should 1839 set `data` to `NULL`. 1840 1841 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1842 @*/ 1843 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1844 { 1845 PetscFunctionBegin; 1846 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1847 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 /*@ 1852 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1853 array provided by the user. This is useful to avoid copying an array 1854 into a matrix 1855 1856 Not Collective 1857 1858 Input Parameters: 1859 + mat - the matrix 1860 - array - the array in column major order 1861 1862 Level: developer 1863 1864 Note: 1865 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1866 1867 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1868 freed when the matrix is destroyed. 1869 1870 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1871 `MatDenseReplaceArray()` 1872 @*/ 1873 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1874 { 1875 PetscFunctionBegin; 1876 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1877 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1878 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1879 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1880 mat->offloadmask = PETSC_OFFLOAD_CPU; 1881 #endif 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 /*@ 1886 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1887 1888 Not Collective 1889 1890 Input Parameter: 1891 . mat - the matrix 1892 1893 Level: developer 1894 1895 Note: 1896 You can only call this after a call to `MatDensePlaceArray()` 1897 1898 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1899 @*/ 1900 PetscErrorCode MatDenseResetArray(Mat mat) 1901 { 1902 PetscFunctionBegin; 1903 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1904 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1905 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1906 PetscFunctionReturn(PETSC_SUCCESS); 1907 } 1908 1909 /*@ 1910 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1911 array provided by the user. This is useful to avoid copying an array 1912 into a matrix 1913 1914 Not Collective 1915 1916 Input Parameters: 1917 + mat - the matrix 1918 - array - the array in column major order 1919 1920 Level: developer 1921 1922 Note: 1923 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1924 1925 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1926 freed by the user. It will be freed when the matrix is destroyed. 1927 1928 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1929 @*/ 1930 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1931 { 1932 PetscFunctionBegin; 1933 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1934 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1935 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1936 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1937 mat->offloadmask = PETSC_OFFLOAD_CPU; 1938 #endif 1939 PetscFunctionReturn(PETSC_SUCCESS); 1940 } 1941 1942 /*@ 1943 MatCreateDense - Creates a matrix in `MATDENSE` format. 1944 1945 Collective 1946 1947 Input Parameters: 1948 + comm - MPI communicator 1949 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1950 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1951 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1952 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1953 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc 1954 to control all matrix memory allocation. 1955 1956 Output Parameter: 1957 . A - the matrix 1958 1959 Level: intermediate 1960 1961 Notes: 1962 The dense format is fully compatible with standard Fortran 1963 storage by columns. 1964 1965 Although local portions of the matrix are stored in column-major 1966 order, the matrix is partitioned across MPI ranks by row. 1967 1968 The data input variable is intended primarily for Fortran programmers 1969 who wish to allocate their own matrix memory space. Most users should 1970 set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users). 1971 1972 The user MUST specify either the local or global matrix dimensions 1973 (possibly both). 1974 1975 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1976 @*/ 1977 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1978 { 1979 PetscFunctionBegin; 1980 PetscCall(MatCreate(comm, A)); 1981 PetscCall(MatSetSizes(*A, m, n, M, N)); 1982 PetscCall(MatSetType(*A, MATDENSE)); 1983 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1984 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1985 PetscFunctionReturn(PETSC_SUCCESS); 1986 } 1987 1988 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1989 { 1990 Mat mat; 1991 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1992 1993 PetscFunctionBegin; 1994 *newmat = NULL; 1995 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1996 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1997 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1998 a = (Mat_MPIDense *)mat->data; 1999 2000 mat->factortype = A->factortype; 2001 mat->assembled = PETSC_TRUE; 2002 mat->preallocated = PETSC_TRUE; 2003 2004 mat->insertmode = NOT_SET_VALUES; 2005 a->donotstash = oldmat->donotstash; 2006 2007 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2008 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2009 2010 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2011 2012 *newmat = mat; 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2017 { 2018 PetscBool isbinary; 2019 #if defined(PETSC_HAVE_HDF5) 2020 PetscBool ishdf5; 2021 #endif 2022 2023 PetscFunctionBegin; 2024 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2025 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2026 /* force binary viewer to load .info file if it has not yet done so */ 2027 PetscCall(PetscViewerSetUp(viewer)); 2028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2029 #if defined(PETSC_HAVE_HDF5) 2030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2031 #endif 2032 if (isbinary) { 2033 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2034 #if defined(PETSC_HAVE_HDF5) 2035 } else if (ishdf5) { 2036 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2037 #endif 2038 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2039 PetscFunctionReturn(PETSC_SUCCESS); 2040 } 2041 2042 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2043 { 2044 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2045 Mat a, b; 2046 2047 PetscFunctionBegin; 2048 a = matA->A; 2049 b = matB->A; 2050 PetscCall(MatEqual(a, b, flag)); 2051 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2056 { 2057 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2061 PetscCall(MatDestroy(&atb->atb)); 2062 PetscCall(PetscFree(atb)); 2063 PetscFunctionReturn(PETSC_SUCCESS); 2064 } 2065 2066 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2067 { 2068 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2069 2070 PetscFunctionBegin; 2071 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2072 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2073 PetscCall(PetscFree(abt)); 2074 PetscFunctionReturn(PETSC_SUCCESS); 2075 } 2076 2077 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2078 { 2079 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2080 Mat_TransMatMultDense *atb; 2081 MPI_Comm comm; 2082 PetscMPIInt size, *recvcounts; 2083 PetscScalar *carray, *sendbuf; 2084 const PetscScalar *atbarray; 2085 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2086 const PetscInt *ranges; 2087 2088 PetscFunctionBegin; 2089 MatCheckProduct(C, 3); 2090 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2091 atb = (Mat_TransMatMultDense *)C->product->data; 2092 recvcounts = atb->recvcounts; 2093 sendbuf = atb->sendbuf; 2094 2095 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2096 PetscCallMPI(MPI_Comm_size(comm, &size)); 2097 2098 /* compute atbarray = aseq^T * bseq */ 2099 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2100 2101 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2102 2103 if (ranges[1] == C->rmap->N) { 2104 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */ 2105 PetscInt atb_lda, c_lda; 2106 Mat atb_local = atb->atb; 2107 Mat atb_alloc = NULL; 2108 Mat c_local = c->A; 2109 Mat c_alloc = NULL; 2110 PetscMemType atb_memtype, c_memtype; 2111 const PetscScalar *atb_array = NULL; 2112 MPI_Datatype vector_type; 2113 PetscScalar *c_array = NULL; 2114 PetscMPIInt rank; 2115 2116 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2117 2118 PetscCall(MatDenseGetLDA(atb_local, &atb_lda)); 2119 if (atb_lda != C->rmap->N) { 2120 // copy atb to a matrix that will have lda == the number of rows 2121 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc)); 2122 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN)); 2123 atb_local = atb_alloc; 2124 } 2125 2126 if (rank == 0) { 2127 PetscCall(MatDenseGetLDA(c_local, &c_lda)); 2128 if (c_lda != C->rmap->N) { 2129 // copy c to a matrix that will have lda == the number of rows 2130 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc)); 2131 c_local = c_alloc; 2132 } 2133 PetscCall(MatZeroEntries(c_local)); 2134 } 2135 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols = 2136 * B->cmap->N: use the a->Mvctx to use the best reduction method */ 2137 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 2138 vector_type = MPIU_SCALAR; 2139 if (B->cmap->N > 1) { 2140 PetscMPIInt mpi_N; 2141 2142 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2143 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2144 PetscCallMPI(MPI_Type_commit(&vector_type)); 2145 } 2146 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype)); 2147 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype)); 2148 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM)); 2149 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM)); 2150 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array)); 2151 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array)); 2152 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN)); 2153 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2154 PetscCall(MatDestroy(&atb_alloc)); 2155 PetscCall(MatDestroy(&c_alloc)); 2156 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2157 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2158 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2159 PetscFunctionReturn(PETSC_SUCCESS); 2160 } 2161 2162 /* arrange atbarray into sendbuf */ 2163 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2164 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2165 for (proc = 0, k = 0; proc < size; proc++) { 2166 for (j = 0; j < cN; j++) { 2167 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2168 } 2169 } 2170 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2171 2172 /* sum all atbarray to local values of C */ 2173 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2174 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2175 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2176 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2177 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2178 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2183 { 2184 MPI_Comm comm; 2185 PetscMPIInt size; 2186 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2187 Mat_TransMatMultDense *atb; 2188 PetscBool cisdense = PETSC_FALSE; 2189 const PetscInt *ranges; 2190 2191 PetscFunctionBegin; 2192 MatCheckProduct(C, 4); 2193 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2194 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2195 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, 2196 A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2197 2198 /* create matrix product C */ 2199 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2200 #if defined(PETSC_HAVE_CUDA) 2201 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2202 #endif 2203 #if defined(PETSC_HAVE_HIP) 2204 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2205 #endif 2206 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2207 PetscCall(MatSetUp(C)); 2208 2209 /* create data structure for reuse C */ 2210 PetscCallMPI(MPI_Comm_size(comm, &size)); 2211 PetscCall(PetscNew(&atb)); 2212 cM = C->rmap->N; 2213 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2214 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2215 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2216 C->product->data = atb; 2217 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2218 PetscFunctionReturn(PETSC_SUCCESS); 2219 } 2220 2221 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2222 { 2223 MPI_Comm comm; 2224 PetscMPIInt i, size; 2225 PetscInt maxRows, bufsiz; 2226 PetscMPIInt tag; 2227 PetscInt alg; 2228 Mat_MatTransMultDense *abt; 2229 Mat_Product *product = C->product; 2230 PetscBool flg; 2231 2232 PetscFunctionBegin; 2233 MatCheckProduct(C, 4); 2234 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2235 /* check local size of A and B */ 2236 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2237 2238 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2239 alg = flg ? 0 : 1; 2240 2241 /* setup matrix product C */ 2242 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2243 PetscCall(MatSetType(C, MATMPIDENSE)); 2244 PetscCall(MatSetUp(C)); 2245 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2246 2247 /* create data structure for reuse C */ 2248 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2249 PetscCallMPI(MPI_Comm_size(comm, &size)); 2250 PetscCall(PetscNew(&abt)); 2251 abt->tag = tag; 2252 abt->alg = alg; 2253 switch (alg) { 2254 case 1: /* alg: "cyclic" */ 2255 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2256 bufsiz = A->cmap->N * maxRows; 2257 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2258 break; 2259 default: /* alg: "allgatherv" */ 2260 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2261 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2262 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2263 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2264 break; 2265 } 2266 2267 C->product->data = abt; 2268 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2269 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2270 PetscFunctionReturn(PETSC_SUCCESS); 2271 } 2272 2273 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2274 { 2275 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2276 Mat_MatTransMultDense *abt; 2277 MPI_Comm comm; 2278 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2279 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2280 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2281 PetscScalar _DOne = 1.0, _DZero = 0.0; 2282 const PetscScalar *av, *bv; 2283 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2284 MPI_Request reqs[2]; 2285 const PetscInt *ranges; 2286 2287 PetscFunctionBegin; 2288 MatCheckProduct(C, 3); 2289 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2290 abt = (Mat_MatTransMultDense *)C->product->data; 2291 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2292 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2293 PetscCallMPI(MPI_Comm_size(comm, &size)); 2294 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2295 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2296 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2297 PetscCall(MatDenseGetLDA(a->A, &i)); 2298 PetscCall(PetscBLASIntCast(i, &alda)); 2299 PetscCall(MatDenseGetLDA(b->A, &i)); 2300 PetscCall(PetscBLASIntCast(i, &blda)); 2301 PetscCall(MatDenseGetLDA(c->A, &i)); 2302 PetscCall(PetscBLASIntCast(i, &clda)); 2303 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2304 bn = B->rmap->n; 2305 if (blda == bn) { 2306 sendbuf = (PetscScalar *)bv; 2307 } else { 2308 sendbuf = abt->buf[0]; 2309 for (k = 0, i = 0; i < cK; i++) { 2310 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2311 } 2312 } 2313 if (size > 1) { 2314 sendto = (rank + size - 1) % size; 2315 recvfrom = (rank + size + 1) % size; 2316 } else { 2317 sendto = recvfrom = 0; 2318 } 2319 PetscCall(PetscBLASIntCast(cK, &ck)); 2320 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2321 recvisfrom = rank; 2322 for (i = 0; i < size; i++) { 2323 /* we have finished receiving in sending, bufs can be read/modified */ 2324 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2325 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2326 2327 if (nextrecvisfrom != rank) { 2328 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2329 sendsiz = cK * bn; 2330 recvsiz = cK * nextbn; 2331 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2332 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2333 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2334 } 2335 2336 /* local aseq * sendbuf^T */ 2337 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2338 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2339 2340 if (nextrecvisfrom != rank) { 2341 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2342 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2343 } 2344 bn = nextbn; 2345 recvisfrom = nextrecvisfrom; 2346 sendbuf = recvbuf; 2347 } 2348 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2349 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2350 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2351 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2352 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2353 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2354 PetscFunctionReturn(PETSC_SUCCESS); 2355 } 2356 2357 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2358 { 2359 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2360 Mat_MatTransMultDense *abt; 2361 MPI_Comm comm; 2362 PetscMPIInt size, ibn; 2363 PetscScalar *cv, *sendbuf, *recvbuf; 2364 const PetscScalar *av, *bv; 2365 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2366 PetscScalar _DOne = 1.0, _DZero = 0.0; 2367 PetscBLASInt cm, cn, ck, alda, clda; 2368 2369 PetscFunctionBegin; 2370 MatCheckProduct(C, 3); 2371 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2372 abt = (Mat_MatTransMultDense *)C->product->data; 2373 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2374 PetscCallMPI(MPI_Comm_size(comm, &size)); 2375 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2376 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2377 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2378 PetscCall(MatDenseGetLDA(a->A, &i)); 2379 PetscCall(PetscBLASIntCast(i, &alda)); 2380 PetscCall(MatDenseGetLDA(b->A, &blda)); 2381 PetscCall(MatDenseGetLDA(c->A, &i)); 2382 PetscCall(PetscBLASIntCast(i, &clda)); 2383 /* copy transpose of B into buf[0] */ 2384 bn = B->rmap->n; 2385 sendbuf = abt->buf[0]; 2386 recvbuf = abt->buf[1]; 2387 for (k = 0, j = 0; j < bn; j++) { 2388 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2389 } 2390 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2391 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2392 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2393 PetscCall(PetscBLASIntCast(cK, &ck)); 2394 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2395 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2396 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2397 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2398 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2399 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2400 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2401 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2402 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2407 { 2408 Mat_MatTransMultDense *abt; 2409 2410 PetscFunctionBegin; 2411 MatCheckProduct(C, 3); 2412 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2413 abt = (Mat_MatTransMultDense *)C->product->data; 2414 switch (abt->alg) { 2415 case 1: 2416 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2417 break; 2418 default: 2419 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2420 break; 2421 } 2422 PetscFunctionReturn(PETSC_SUCCESS); 2423 } 2424 2425 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2426 { 2427 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2428 2429 PetscFunctionBegin; 2430 PetscCall(MatDestroy(&ab->Ce)); 2431 PetscCall(MatDestroy(&ab->Ae)); 2432 PetscCall(MatDestroy(&ab->Be)); 2433 PetscCall(PetscFree(ab)); 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2438 { 2439 Mat_MatMultDense *ab; 2440 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2441 Mat_MPIDense *b = (Mat_MPIDense *)B->data; 2442 2443 PetscFunctionBegin; 2444 MatCheckProduct(C, 3); 2445 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2446 ab = (Mat_MatMultDense *)C->product->data; 2447 if (ab->Ae && ab->Ce) { 2448 #if PetscDefined(HAVE_ELEMENTAL) 2449 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2450 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2451 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2452 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2453 #else 2454 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2455 #endif 2456 } else { 2457 MPI_Comm comm; 2458 const PetscScalar *read; 2459 PetscScalar *write; 2460 PetscInt lda; 2461 const PetscInt *ranges; 2462 PetscMPIInt size; 2463 2464 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2465 comm = PetscObjectComm((PetscObject)B); 2466 PetscCallMPI(MPI_Comm_size(comm, &size)); 2467 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges)); 2468 if (ranges[1] == ranges[size]) { 2469 // optimize for the case where the B matrix is broadcast from rank 0 2470 PetscInt b_lda, be_lda; 2471 Mat b_local = b->A; 2472 Mat b_alloc = NULL; 2473 Mat be_local = ab->Be; 2474 Mat be_alloc = NULL; 2475 PetscMemType b_memtype, be_memtype; 2476 const PetscScalar *b_array = NULL; 2477 MPI_Datatype vector_type; 2478 PetscScalar *be_array = NULL; 2479 PetscMPIInt rank; 2480 2481 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2482 PetscCall(MatDenseGetLDA(be_local, &be_lda)); 2483 if (be_lda != B->rmap->N) { 2484 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc)); 2485 be_local = be_alloc; 2486 } 2487 2488 if (rank == 0) { 2489 PetscCall(MatDenseGetLDA(b_local, &b_lda)); 2490 if (b_lda != B->rmap->N) { 2491 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc)); 2492 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN)); 2493 b_local = b_alloc; 2494 } 2495 } 2496 vector_type = MPIU_SCALAR; 2497 if (B->cmap->N > 1) { 2498 PetscMPIInt mpi_N; 2499 2500 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2501 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2502 PetscCallMPI(MPI_Type_commit(&vector_type)); 2503 } 2504 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype)); 2505 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype)); 2506 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE)); 2507 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE)); 2508 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array)); 2509 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array)); 2510 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN)); 2511 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2512 PetscCall(MatDestroy(&be_alloc)); 2513 PetscCall(MatDestroy(&b_alloc)); 2514 } else { 2515 PetscCall(MatDenseGetLDA(B, &lda)); 2516 PetscCall(MatDenseGetArrayRead(B, &read)); 2517 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2518 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2519 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2520 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2521 } 2522 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2523 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2524 } 2525 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2526 } 2527 PetscFunctionReturn(PETSC_SUCCESS); 2528 } 2529 2530 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2531 { 2532 Mat_Product *product = C->product; 2533 PetscInt alg; 2534 Mat_MatMultDense *ab; 2535 PetscBool flg; 2536 2537 PetscFunctionBegin; 2538 MatCheckProduct(C, 4); 2539 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2540 /* check local size of A and B */ 2541 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2542 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2543 2544 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2545 alg = flg ? 0 : 1; 2546 2547 /* setup C */ 2548 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2549 PetscCall(MatSetType(C, MATMPIDENSE)); 2550 PetscCall(MatSetUp(C)); 2551 2552 /* create data structure for reuse Cdense */ 2553 PetscCall(PetscNew(&ab)); 2554 2555 switch (alg) { 2556 case 1: /* alg: "elemental" */ 2557 #if PetscDefined(HAVE_ELEMENTAL) 2558 /* create elemental matrices Ae and Be */ 2559 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2560 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2561 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2562 PetscCall(MatSetUp(ab->Ae)); 2563 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2564 2565 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2566 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2567 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2568 PetscCall(MatSetUp(ab->Be)); 2569 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2570 2571 /* compute symbolic Ce = Ae*Be */ 2572 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2573 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2574 #else 2575 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2576 #endif 2577 break; 2578 default: /* alg: "petsc" */ 2579 ab->Ae = NULL; 2580 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2581 ab->Ce = NULL; 2582 break; 2583 } 2584 2585 C->product->data = ab; 2586 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2587 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2592 { 2593 Mat_Product *product = C->product; 2594 const char *algTypes[2] = {"petsc", "elemental"}; 2595 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2596 PetscBool flg = PETSC_FALSE; 2597 2598 PetscFunctionBegin; 2599 /* Set default algorithm */ 2600 alg = 0; /* default is PETSc */ 2601 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2602 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2603 2604 /* Get runtime option */ 2605 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2606 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2607 PetscOptionsEnd(); 2608 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2609 2610 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2611 C->ops->productsymbolic = MatProductSymbolic_AB; 2612 PetscFunctionReturn(PETSC_SUCCESS); 2613 } 2614 2615 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2616 { 2617 Mat_Product *product = C->product; 2618 Mat A = product->A, B = product->B; 2619 2620 PetscFunctionBegin; 2621 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2622 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2623 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2624 C->ops->productsymbolic = MatProductSymbolic_AtB; 2625 PetscFunctionReturn(PETSC_SUCCESS); 2626 } 2627 2628 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2629 { 2630 Mat_Product *product = C->product; 2631 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2632 PetscInt alg, nalg = 2; 2633 PetscBool flg = PETSC_FALSE; 2634 2635 PetscFunctionBegin; 2636 /* Set default algorithm */ 2637 alg = 0; /* default is allgatherv */ 2638 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2639 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2640 2641 /* Get runtime option */ 2642 if (product->api_user) { 2643 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2644 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2645 PetscOptionsEnd(); 2646 } else { 2647 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2648 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2649 PetscOptionsEnd(); 2650 } 2651 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2652 2653 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2654 C->ops->productsymbolic = MatProductSymbolic_ABt; 2655 PetscFunctionReturn(PETSC_SUCCESS); 2656 } 2657 2658 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2659 { 2660 Mat_Product *product = C->product; 2661 2662 PetscFunctionBegin; 2663 switch (product->type) { 2664 case MATPRODUCT_AB: 2665 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2666 break; 2667 case MATPRODUCT_AtB: 2668 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2669 break; 2670 case MATPRODUCT_ABt: 2671 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2672 break; 2673 default: 2674 break; 2675 } 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678