1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 469 const PetscScalar *ax; 470 PetscScalar *ay; 471 PetscMemType axmtype, aymtype; 472 473 PetscFunctionBegin; 474 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 475 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 476 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 477 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 478 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 479 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 480 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 481 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 488 const PetscScalar *ax; 489 PetscScalar *ay; 490 PetscMemType axmtype, aymtype; 491 492 PetscFunctionBegin; 493 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 494 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 495 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 496 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 497 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 498 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 499 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 500 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 507 const PetscScalar *ax; 508 PetscScalar *ay; 509 PetscMemType axmtype, aymtype; 510 511 PetscFunctionBegin; 512 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 513 PetscCall(VecSet(yy, 0.0)); 514 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 515 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 516 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 517 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 518 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 519 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 520 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 525 { 526 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 527 const PetscScalar *ax; 528 PetscScalar *ay; 529 PetscMemType axmtype, aymtype; 530 531 PetscFunctionBegin; 532 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 533 PetscCall(VecCopy(yy, zz)); 534 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 535 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 536 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 537 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 538 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 539 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 540 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 545 { 546 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 547 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 548 PetscScalar *x; 549 const PetscScalar *av; 550 551 PetscFunctionBegin; 552 PetscCall(VecGetArray(v, &x)); 553 PetscCall(VecGetSize(v, &ng)); 554 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 555 PetscCall(VecGetLocalSize(v, &nl)); 556 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 557 radd = A->rmap->rstart * m; 558 PetscCall(MatDenseGetArrayRead(a->A, &av)); 559 PetscCall(MatDenseGetLDA(a->A, &lda)); 560 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 561 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 562 PetscCall(PetscArrayzero(x + i, nl - i)); 563 PetscCall(VecRestoreArray(v, &x)); 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 568 { 569 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 570 571 PetscFunctionBegin; 572 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 573 PetscCall(MatStashDestroy_Private(&mat->stash)); 574 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 575 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 576 PetscCall(MatDestroy(&mdn->A)); 577 PetscCall(VecDestroy(&mdn->lvec)); 578 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 579 PetscCall(VecDestroy(&mdn->cvec)); 580 PetscCall(MatDestroy(&mdn->cmat)); 581 582 PetscCall(PetscFree(mat->data)); 583 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 584 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 598 #if defined(PETSC_HAVE_ELEMENTAL) 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 600 #endif 601 #if defined(PETSC_HAVE_SCALAPACK) 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 603 #endif 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 605 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 607 #if defined(PETSC_HAVE_CUDA) 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 626 #endif 627 #if defined(PETSC_HAVE_HIP) 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 646 #endif 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 657 658 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 #include <petscdraw.h> 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 666 PetscMPIInt rank; 667 PetscViewerType vtype; 668 PetscBool iascii, isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 674 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 675 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 676 if (iascii) { 677 PetscCall(PetscViewerGetType(viewer, &vtype)); 678 PetscCall(PetscViewerGetFormat(viewer, &format)); 679 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 680 MatInfo info; 681 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 682 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 683 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 684 (PetscInt)info.memory)); 685 PetscCall(PetscViewerFlush(viewer)); 686 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 687 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } else if (format == PETSC_VIEWER_ASCII_INFO) { 690 PetscFunctionReturn(PETSC_SUCCESS); 691 } 692 } else if (isdraw) { 693 PetscDraw draw; 694 PetscBool isnull; 695 696 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 697 PetscCall(PetscDrawIsNull(draw, &isnull)); 698 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 699 } 700 701 { 702 /* assemble the entire matrix onto first processor. */ 703 Mat A; 704 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 705 PetscInt *cols; 706 PetscScalar *vals; 707 708 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 709 if (rank == 0) { 710 PetscCall(MatSetSizes(A, M, N, M, N)); 711 } else { 712 PetscCall(MatSetSizes(A, 0, 0, M, N)); 713 } 714 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 715 PetscCall(MatSetType(A, MATMPIDENSE)); 716 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 717 718 /* Copy the matrix ... This isn't the most efficient means, 719 but it's quick for now */ 720 A->insertmode = INSERT_VALUES; 721 722 row = mat->rmap->rstart; 723 m = mdn->A->rmap->n; 724 for (i = 0; i < m; i++) { 725 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 726 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 727 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 728 row++; 729 } 730 731 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 732 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 733 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 734 if (rank == 0) { 735 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 736 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 737 } 738 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 739 PetscCall(MatDestroy(&A)); 740 } 741 PetscFunctionReturn(PETSC_SUCCESS); 742 } 743 744 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 745 { 746 PetscBool iascii, isbinary, isdraw, issocket; 747 748 PetscFunctionBegin; 749 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 750 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 751 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 752 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 753 754 if (iascii || issocket || isdraw) { 755 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 756 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 761 { 762 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 763 Mat mdn = mat->A; 764 PetscLogDouble isend[5], irecv[5]; 765 766 PetscFunctionBegin; 767 info->block_size = 1.0; 768 769 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 770 771 isend[0] = info->nz_used; 772 isend[1] = info->nz_allocated; 773 isend[2] = info->nz_unneeded; 774 isend[3] = info->memory; 775 isend[4] = info->mallocs; 776 if (flag == MAT_LOCAL) { 777 info->nz_used = isend[0]; 778 info->nz_allocated = isend[1]; 779 info->nz_unneeded = isend[2]; 780 info->memory = isend[3]; 781 info->mallocs = isend[4]; 782 } else if (flag == MAT_GLOBAL_MAX) { 783 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 784 785 info->nz_used = irecv[0]; 786 info->nz_allocated = irecv[1]; 787 info->nz_unneeded = irecv[2]; 788 info->memory = irecv[3]; 789 info->mallocs = irecv[4]; 790 } else if (flag == MAT_GLOBAL_SUM) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } 799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 800 info->fill_ratio_needed = 0; 801 info->factor_mallocs = 0; 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 806 { 807 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 808 809 PetscFunctionBegin; 810 switch (op) { 811 case MAT_NEW_NONZERO_LOCATIONS: 812 case MAT_NEW_NONZERO_LOCATION_ERR: 813 case MAT_NEW_NONZERO_ALLOCATION_ERR: 814 MatCheckPreallocated(A, 1); 815 PetscCall(MatSetOption(a->A, op, flg)); 816 break; 817 case MAT_ROW_ORIENTED: 818 MatCheckPreallocated(A, 1); 819 a->roworiented = flg; 820 PetscCall(MatSetOption(a->A, op, flg)); 821 break; 822 case MAT_FORCE_DIAGONAL_ENTRIES: 823 case MAT_KEEP_NONZERO_PATTERN: 824 case MAT_USE_HASH_TABLE: 825 case MAT_SORTED_FULL: 826 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 836 case MAT_SPD: 837 case MAT_IGNORE_LOWER_TRIANGULAR: 838 case MAT_IGNORE_ZERO_ENTRIES: 839 case MAT_SPD_ETERNAL: 840 /* if the diagonal matrix is square it inherits some of the properties above */ 841 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 842 break; 843 default: 844 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 845 } 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 850 { 851 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 852 const PetscScalar *l; 853 PetscScalar x, *v, *vv, *r; 854 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 855 856 PetscFunctionBegin; 857 PetscCall(MatDenseGetArray(mdn->A, &vv)); 858 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 859 PetscCall(MatGetLocalSize(A, &s2, &s3)); 860 if (ll) { 861 PetscCall(VecGetLocalSize(ll, &s2a)); 862 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 863 PetscCall(VecGetArrayRead(ll, &l)); 864 for (i = 0; i < m; i++) { 865 x = l[i]; 866 v = vv + i; 867 for (j = 0; j < n; j++) { 868 (*v) *= x; 869 v += lda; 870 } 871 } 872 PetscCall(VecRestoreArrayRead(ll, &l)); 873 PetscCall(PetscLogFlops(1.0 * n * m)); 874 } 875 if (rr) { 876 const PetscScalar *ar; 877 878 PetscCall(VecGetLocalSize(rr, &s3a)); 879 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 880 PetscCall(VecGetArrayRead(rr, &ar)); 881 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 882 PetscCall(VecGetArray(mdn->lvec, &r)); 883 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 884 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 885 PetscCall(VecRestoreArrayRead(rr, &ar)); 886 for (i = 0; i < n; i++) { 887 x = r[i]; 888 v = vv + i * lda; 889 for (j = 0; j < m; j++) (*v++) *= x; 890 } 891 PetscCall(VecRestoreArray(mdn->lvec, &r)); 892 PetscCall(PetscLogFlops(1.0 * n * m)); 893 } 894 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 899 { 900 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 901 PetscInt i, j; 902 PetscMPIInt size; 903 PetscReal sum = 0.0; 904 const PetscScalar *av, *v; 905 906 PetscFunctionBegin; 907 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 908 v = av; 909 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 910 if (size == 1) { 911 PetscCall(MatNorm(mdn->A, type, nrm)); 912 } else { 913 if (type == NORM_FROBENIUS) { 914 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 915 sum += PetscRealPart(PetscConj(*v) * (*v)); 916 v++; 917 } 918 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 919 *nrm = PetscSqrtReal(*nrm); 920 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 921 } else if (type == NORM_1) { 922 PetscReal *tmp, *tmp2; 923 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 924 *nrm = 0.0; 925 v = av; 926 for (j = 0; j < mdn->A->cmap->n; j++) { 927 for (i = 0; i < mdn->A->rmap->n; i++) { 928 tmp[j] += PetscAbsScalar(*v); 929 v++; 930 } 931 } 932 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 933 for (j = 0; j < A->cmap->N; j++) { 934 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 935 } 936 PetscCall(PetscFree2(tmp, tmp2)); 937 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 938 } else if (type == NORM_INFINITY) { /* max row norm */ 939 PetscReal ntemp; 940 PetscCall(MatNorm(mdn->A, type, &ntemp)); 941 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 942 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 943 } 944 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 949 { 950 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 951 Mat B; 952 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 953 PetscInt j, i, lda; 954 PetscScalar *v; 955 956 PetscFunctionBegin; 957 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 958 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 959 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 960 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 961 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 962 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 963 } else B = *matout; 964 965 m = a->A->rmap->n; 966 n = a->A->cmap->n; 967 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 968 PetscCall(MatDenseGetLDA(a->A, &lda)); 969 PetscCall(PetscMalloc1(m, &rwork)); 970 for (i = 0; i < m; i++) rwork[i] = rstart + i; 971 for (j = 0; j < n; j++) { 972 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 973 v += lda; 974 } 975 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(PetscFree(rwork)); 977 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 978 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 979 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 980 *matout = B; 981 } else { 982 PetscCall(MatHeaderMerge(A, &B)); 983 } 984 PetscFunctionReturn(PETSC_SUCCESS); 985 } 986 987 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 988 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 989 990 static PetscErrorCode MatSetUp_MPIDense(Mat A) 991 { 992 PetscFunctionBegin; 993 PetscCall(PetscLayoutSetUp(A->rmap)); 994 PetscCall(PetscLayoutSetUp(A->cmap)); 995 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1000 { 1001 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1002 1003 PetscFunctionBegin; 1004 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1009 { 1010 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1011 1012 PetscFunctionBegin; 1013 PetscCall(MatConjugate(a->A)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1020 1021 PetscFunctionBegin; 1022 PetscCall(MatRealPart(a->A)); 1023 PetscFunctionReturn(PETSC_SUCCESS); 1024 } 1025 1026 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1029 1030 PetscFunctionBegin; 1031 PetscCall(MatImaginaryPart(a->A)); 1032 PetscFunctionReturn(PETSC_SUCCESS); 1033 } 1034 1035 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1038 1039 PetscFunctionBegin; 1040 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1041 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1042 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1047 1048 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1049 { 1050 PetscInt i, m, n; 1051 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1052 PetscReal *work; 1053 1054 PetscFunctionBegin; 1055 PetscCall(MatGetSize(A, &m, &n)); 1056 PetscCall(PetscMalloc1(n, &work)); 1057 if (type == REDUCTION_MEAN_REALPART) { 1058 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1059 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1060 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1061 } else { 1062 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1063 } 1064 if (type == NORM_2) { 1065 for (i = 0; i < n; i++) work[i] *= work[i]; 1066 } 1067 if (type == NORM_INFINITY) { 1068 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1069 } else { 1070 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1071 } 1072 PetscCall(PetscFree(work)); 1073 if (type == NORM_2) { 1074 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1075 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1076 for (i = 0; i < n; i++) reductions[i] /= m; 1077 } 1078 PetscFunctionReturn(PETSC_SUCCESS); 1079 } 1080 1081 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1082 { 1083 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1084 1085 PetscFunctionBegin; 1086 PetscCall(MatSetRandom(d->A, rctx)); 1087 #if defined(PETSC_HAVE_DEVICE) 1088 x->offloadmask = d->A->offloadmask; 1089 #endif 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1094 { 1095 PetscFunctionBegin; 1096 *missing = PETSC_FALSE; 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1101 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1102 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1103 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1104 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1105 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1106 1107 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1108 MatGetRow_MPIDense, 1109 MatRestoreRow_MPIDense, 1110 MatMult_MPIDense, 1111 /* 4*/ MatMultAdd_MPIDense, 1112 MatMultTranspose_MPIDense, 1113 MatMultTransposeAdd_MPIDense, 1114 NULL, 1115 NULL, 1116 NULL, 1117 /* 10*/ NULL, 1118 NULL, 1119 NULL, 1120 NULL, 1121 MatTranspose_MPIDense, 1122 /* 15*/ MatGetInfo_MPIDense, 1123 MatEqual_MPIDense, 1124 MatGetDiagonal_MPIDense, 1125 MatDiagonalScale_MPIDense, 1126 MatNorm_MPIDense, 1127 /* 20*/ MatAssemblyBegin_MPIDense, 1128 MatAssemblyEnd_MPIDense, 1129 MatSetOption_MPIDense, 1130 MatZeroEntries_MPIDense, 1131 /* 24*/ MatZeroRows_MPIDense, 1132 NULL, 1133 NULL, 1134 NULL, 1135 NULL, 1136 /* 29*/ MatSetUp_MPIDense, 1137 NULL, 1138 NULL, 1139 MatGetDiagonalBlock_MPIDense, 1140 NULL, 1141 /* 34*/ MatDuplicate_MPIDense, 1142 NULL, 1143 NULL, 1144 NULL, 1145 NULL, 1146 /* 39*/ MatAXPY_MPIDense, 1147 MatCreateSubMatrices_MPIDense, 1148 NULL, 1149 MatGetValues_MPIDense, 1150 MatCopy_MPIDense, 1151 /* 44*/ NULL, 1152 MatScale_MPIDense, 1153 MatShift_MPIDense, 1154 NULL, 1155 NULL, 1156 /* 49*/ MatSetRandom_MPIDense, 1157 NULL, 1158 NULL, 1159 NULL, 1160 NULL, 1161 /* 54*/ NULL, 1162 NULL, 1163 NULL, 1164 NULL, 1165 NULL, 1166 /* 59*/ MatCreateSubMatrix_MPIDense, 1167 MatDestroy_MPIDense, 1168 MatView_MPIDense, 1169 NULL, 1170 NULL, 1171 /* 64*/ NULL, 1172 NULL, 1173 NULL, 1174 NULL, 1175 NULL, 1176 /* 69*/ NULL, 1177 NULL, 1178 NULL, 1179 NULL, 1180 NULL, 1181 /* 74*/ NULL, 1182 NULL, 1183 NULL, 1184 NULL, 1185 NULL, 1186 /* 79*/ NULL, 1187 NULL, 1188 NULL, 1189 NULL, 1190 /* 83*/ MatLoad_MPIDense, 1191 NULL, 1192 NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 /* 89*/ NULL, 1197 NULL, 1198 NULL, 1199 NULL, 1200 NULL, 1201 /* 94*/ NULL, 1202 NULL, 1203 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1204 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1205 NULL, 1206 /* 99*/ MatProductSetFromOptions_MPIDense, 1207 NULL, 1208 NULL, 1209 MatConjugate_MPIDense, 1210 NULL, 1211 /*104*/ NULL, 1212 MatRealPart_MPIDense, 1213 MatImaginaryPart_MPIDense, 1214 NULL, 1215 NULL, 1216 /*109*/ NULL, 1217 NULL, 1218 NULL, 1219 MatGetColumnVector_MPIDense, 1220 MatMissingDiagonal_MPIDense, 1221 /*114*/ NULL, 1222 NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 /*119*/ NULL, 1227 NULL, 1228 NULL, 1229 NULL, 1230 NULL, 1231 /*124*/ NULL, 1232 MatGetColumnReductions_MPIDense, 1233 NULL, 1234 NULL, 1235 NULL, 1236 /*129*/ NULL, 1237 NULL, 1238 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1239 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1240 NULL, 1241 /*134*/ NULL, 1242 NULL, 1243 NULL, 1244 NULL, 1245 NULL, 1246 /*139*/ NULL, 1247 NULL, 1248 NULL, 1249 NULL, 1250 NULL, 1251 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1252 /*145*/ NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 NULL, 1257 /*150*/ NULL, 1258 NULL}; 1259 1260 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1261 { 1262 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1263 MatType mtype = MATSEQDENSE; 1264 1265 PetscFunctionBegin; 1266 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1267 PetscCall(PetscLayoutSetUp(mat->rmap)); 1268 PetscCall(PetscLayoutSetUp(mat->cmap)); 1269 if (!a->A) { 1270 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1271 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1272 } 1273 #if defined(PETSC_HAVE_CUDA) 1274 PetscBool iscuda; 1275 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1276 if (iscuda) mtype = MATSEQDENSECUDA; 1277 #endif 1278 #if defined(PETSC_HAVE_HIP) 1279 PetscBool iship; 1280 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1281 if (iship) mtype = MATSEQDENSEHIP; 1282 #endif 1283 PetscCall(MatSetType(a->A, mtype)); 1284 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1285 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1286 mat->offloadmask = a->A->offloadmask; 1287 #endif 1288 mat->preallocated = PETSC_TRUE; 1289 mat->assembled = PETSC_TRUE; 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1294 { 1295 Mat B, C; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1299 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1300 PetscCall(MatDestroy(&C)); 1301 if (reuse == MAT_REUSE_MATRIX) { 1302 C = *newmat; 1303 } else C = NULL; 1304 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1305 PetscCall(MatDestroy(&B)); 1306 if (reuse == MAT_INPLACE_MATRIX) { 1307 PetscCall(MatHeaderReplace(A, &C)); 1308 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 1312 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1313 { 1314 Mat B, C; 1315 1316 PetscFunctionBegin; 1317 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1318 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1319 if (reuse == MAT_REUSE_MATRIX) { 1320 C = *newmat; 1321 } else C = NULL; 1322 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1323 PetscCall(MatDestroy(&B)); 1324 if (reuse == MAT_INPLACE_MATRIX) { 1325 PetscCall(MatHeaderReplace(A, &C)); 1326 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 #if defined(PETSC_HAVE_ELEMENTAL) 1331 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1332 { 1333 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1334 Mat mat_elemental; 1335 PetscScalar *v; 1336 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1337 1338 PetscFunctionBegin; 1339 if (reuse == MAT_REUSE_MATRIX) { 1340 mat_elemental = *newmat; 1341 PetscCall(MatZeroEntries(*newmat)); 1342 } else { 1343 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1344 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1345 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1346 PetscCall(MatSetUp(mat_elemental)); 1347 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1348 } 1349 1350 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1351 for (i = 0; i < N; i++) cols[i] = i; 1352 for (i = 0; i < m; i++) rows[i] = rstart + i; 1353 1354 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1355 PetscCall(MatDenseGetArray(A, &v)); 1356 PetscCall(MatDenseGetLDA(a->A, &lda)); 1357 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1358 else { 1359 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1360 } 1361 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1362 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1363 PetscCall(MatDenseRestoreArray(A, &v)); 1364 PetscCall(PetscFree2(rows, cols)); 1365 1366 if (reuse == MAT_INPLACE_MATRIX) { 1367 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1368 } else { 1369 *newmat = mat_elemental; 1370 } 1371 PetscFunctionReturn(PETSC_SUCCESS); 1372 } 1373 #endif 1374 1375 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1376 { 1377 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1378 1379 PetscFunctionBegin; 1380 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1381 PetscFunctionReturn(PETSC_SUCCESS); 1382 } 1383 1384 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1385 { 1386 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1387 1388 PetscFunctionBegin; 1389 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1390 PetscFunctionReturn(PETSC_SUCCESS); 1391 } 1392 1393 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1394 { 1395 Mat_MPIDense *mat; 1396 PetscInt m, nloc, N; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatGetSize(inmat, &m, &N)); 1400 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1401 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1402 PetscInt sum; 1403 1404 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1405 /* Check sum(n) = N */ 1406 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1407 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1408 1409 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1410 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1411 } 1412 1413 /* numeric phase */ 1414 mat = (Mat_MPIDense *)(*outmat)->data; 1415 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1416 PetscFunctionReturn(PETSC_SUCCESS); 1417 } 1418 1419 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1420 { 1421 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1422 PetscInt lda; 1423 1424 PetscFunctionBegin; 1425 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1426 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1427 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1428 a->vecinuse = col + 1; 1429 PetscCall(MatDenseGetLDA(a->A, &lda)); 1430 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1431 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1432 *v = a->cvec; 1433 PetscFunctionReturn(PETSC_SUCCESS); 1434 } 1435 1436 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1437 { 1438 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1439 1440 PetscFunctionBegin; 1441 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1442 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1443 a->vecinuse = 0; 1444 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1445 PetscCall(VecResetArray(a->cvec)); 1446 if (v) *v = NULL; 1447 PetscFunctionReturn(PETSC_SUCCESS); 1448 } 1449 1450 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1451 { 1452 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1453 PetscInt lda; 1454 1455 PetscFunctionBegin; 1456 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1457 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1458 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1459 a->vecinuse = col + 1; 1460 PetscCall(MatDenseGetLDA(a->A, &lda)); 1461 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1462 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1463 PetscCall(VecLockReadPush(a->cvec)); 1464 *v = a->cvec; 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1469 { 1470 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1471 1472 PetscFunctionBegin; 1473 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1474 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1475 a->vecinuse = 0; 1476 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1477 PetscCall(VecLockReadPop(a->cvec)); 1478 PetscCall(VecResetArray(a->cvec)); 1479 if (v) *v = NULL; 1480 PetscFunctionReturn(PETSC_SUCCESS); 1481 } 1482 1483 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1484 { 1485 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1486 PetscInt lda; 1487 1488 PetscFunctionBegin; 1489 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1490 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1491 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1492 a->vecinuse = col + 1; 1493 PetscCall(MatDenseGetLDA(a->A, &lda)); 1494 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1495 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1496 *v = a->cvec; 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1501 { 1502 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1503 1504 PetscFunctionBegin; 1505 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1506 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1507 a->vecinuse = 0; 1508 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1509 PetscCall(VecResetArray(a->cvec)); 1510 if (v) *v = NULL; 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1515 { 1516 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1517 Mat_MPIDense *c; 1518 MPI_Comm comm; 1519 PetscInt pbegin, pend; 1520 1521 PetscFunctionBegin; 1522 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1523 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1524 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1525 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1526 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1527 if (!a->cmat) { 1528 PetscCall(MatCreate(comm, &a->cmat)); 1529 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1530 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1531 else { 1532 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1533 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1534 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1535 } 1536 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1537 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1538 } else { 1539 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1540 if (same && a->cmat->rmap->N != A->rmap->N) { 1541 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1542 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1543 } 1544 if (!same) { 1545 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1546 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1547 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1548 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1549 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1550 } 1551 if (cend - cbegin != a->cmat->cmap->N) { 1552 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1553 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1554 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1555 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1556 } 1557 } 1558 c = (Mat_MPIDense *)a->cmat->data; 1559 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1560 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1561 1562 a->cmat->preallocated = PETSC_TRUE; 1563 a->cmat->assembled = PETSC_TRUE; 1564 #if defined(PETSC_HAVE_DEVICE) 1565 a->cmat->offloadmask = c->A->offloadmask; 1566 #endif 1567 a->matinuse = cbegin + 1; 1568 *v = a->cmat; 1569 PetscFunctionReturn(PETSC_SUCCESS); 1570 } 1571 1572 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1573 { 1574 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1575 Mat_MPIDense *c; 1576 1577 PetscFunctionBegin; 1578 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1579 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1580 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1581 a->matinuse = 0; 1582 c = (Mat_MPIDense *)a->cmat->data; 1583 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1584 if (v) *v = NULL; 1585 #if defined(PETSC_HAVE_DEVICE) 1586 A->offloadmask = a->A->offloadmask; 1587 #endif 1588 PetscFunctionReturn(PETSC_SUCCESS); 1589 } 1590 1591 /*MC 1592 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1593 1594 Options Database Key: 1595 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1596 1597 Level: beginner 1598 1599 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1600 M*/ 1601 PetscErrorCode MatCreate_MPIDense(Mat mat) 1602 { 1603 Mat_MPIDense *a; 1604 1605 PetscFunctionBegin; 1606 PetscCall(PetscNew(&a)); 1607 mat->data = (void *)a; 1608 mat->ops[0] = MatOps_Values; 1609 1610 mat->insertmode = NOT_SET_VALUES; 1611 1612 /* build cache for off array entries formed */ 1613 a->donotstash = PETSC_FALSE; 1614 1615 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1616 1617 /* stuff used for matrix vector multiply */ 1618 a->lvec = NULL; 1619 a->Mvctx = NULL; 1620 a->roworiented = PETSC_TRUE; 1621 1622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1643 #if defined(PETSC_HAVE_ELEMENTAL) 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1645 #endif 1646 #if defined(PETSC_HAVE_SCALAPACK) 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1648 #endif 1649 #if defined(PETSC_HAVE_CUDA) 1650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1651 #endif 1652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1655 #if defined(PETSC_HAVE_CUDA) 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1658 #endif 1659 #if defined(PETSC_HAVE_HIP) 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1663 #endif 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1666 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1667 PetscFunctionReturn(PETSC_SUCCESS); 1668 } 1669 1670 /*MC 1671 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1672 1673 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1674 and `MATMPIDENSE` otherwise. 1675 1676 Options Database Key: 1677 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1678 1679 Level: beginner 1680 1681 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1682 M*/ 1683 1684 /*@C 1685 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1686 1687 Collective 1688 1689 Input Parameters: 1690 + B - the matrix 1691 - data - optional location of matrix data. Set to `NULL` for PETSc 1692 to control all matrix memory allocation. 1693 1694 Level: intermediate 1695 1696 Notes: 1697 The dense format is fully compatible with standard Fortran 1698 storage by columns. 1699 1700 The data input variable is intended primarily for Fortran programmers 1701 who wish to allocate their own matrix memory space. Most users should 1702 set `data` to `NULL`. 1703 1704 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1705 @*/ 1706 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1707 { 1708 PetscFunctionBegin; 1709 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1710 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 /*@ 1715 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1716 array provided by the user. This is useful to avoid copying an array 1717 into a matrix 1718 1719 Not Collective 1720 1721 Input Parameters: 1722 + mat - the matrix 1723 - array - the array in column major order 1724 1725 Level: developer 1726 1727 Note: 1728 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1729 freed when the matrix is destroyed. 1730 1731 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1732 `MatDenseReplaceArray()` 1733 @*/ 1734 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1735 { 1736 PetscFunctionBegin; 1737 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1738 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1739 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1740 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1741 mat->offloadmask = PETSC_OFFLOAD_CPU; 1742 #endif 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 /*@ 1747 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1748 1749 Not Collective 1750 1751 Input Parameter: 1752 . mat - the matrix 1753 1754 Level: developer 1755 1756 Note: 1757 You can only call this after a call to `MatDensePlaceArray()` 1758 1759 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1760 @*/ 1761 PetscErrorCode MatDenseResetArray(Mat mat) 1762 { 1763 PetscFunctionBegin; 1764 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1765 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1766 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1767 PetscFunctionReturn(PETSC_SUCCESS); 1768 } 1769 1770 /*@ 1771 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1772 array provided by the user. This is useful to avoid copying an array 1773 into a matrix 1774 1775 Not Collective 1776 1777 Input Parameters: 1778 + mat - the matrix 1779 - array - the array in column major order 1780 1781 Level: developer 1782 1783 Note: 1784 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1785 freed by the user. It will be freed when the matrix is destroyed. 1786 1787 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1788 @*/ 1789 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1790 { 1791 PetscFunctionBegin; 1792 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1793 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1794 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1795 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1796 mat->offloadmask = PETSC_OFFLOAD_CPU; 1797 #endif 1798 PetscFunctionReturn(PETSC_SUCCESS); 1799 } 1800 1801 /*@C 1802 MatCreateDense - Creates a matrix in `MATDENSE` format. 1803 1804 Collective 1805 1806 Input Parameters: 1807 + comm - MPI communicator 1808 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1809 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1810 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1811 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1812 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1813 to control all matrix memory allocation. 1814 1815 Output Parameter: 1816 . A - the matrix 1817 1818 Level: intermediate 1819 1820 Notes: 1821 The dense format is fully compatible with standard Fortran 1822 storage by columns. 1823 1824 Although local portions of the matrix are stored in column-major 1825 order, the matrix is partitioned across MPI ranks by row. 1826 1827 The data input variable is intended primarily for Fortran programmers 1828 who wish to allocate their own matrix memory space. Most users should 1829 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1830 1831 The user MUST specify either the local or global matrix dimensions 1832 (possibly both). 1833 1834 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1835 @*/ 1836 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1837 { 1838 PetscFunctionBegin; 1839 PetscCall(MatCreate(comm, A)); 1840 PetscCall(MatSetSizes(*A, m, n, M, N)); 1841 PetscCall(MatSetType(*A, MATDENSE)); 1842 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1843 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1844 PetscFunctionReturn(PETSC_SUCCESS); 1845 } 1846 1847 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1848 { 1849 Mat mat; 1850 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1851 1852 PetscFunctionBegin; 1853 *newmat = NULL; 1854 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1855 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1856 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1857 a = (Mat_MPIDense *)mat->data; 1858 1859 mat->factortype = A->factortype; 1860 mat->assembled = PETSC_TRUE; 1861 mat->preallocated = PETSC_TRUE; 1862 1863 mat->insertmode = NOT_SET_VALUES; 1864 a->donotstash = oldmat->donotstash; 1865 1866 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1867 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1868 1869 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1870 1871 *newmat = mat; 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1876 { 1877 PetscBool isbinary; 1878 #if defined(PETSC_HAVE_HDF5) 1879 PetscBool ishdf5; 1880 #endif 1881 1882 PetscFunctionBegin; 1883 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1884 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1885 /* force binary viewer to load .info file if it has not yet done so */ 1886 PetscCall(PetscViewerSetUp(viewer)); 1887 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1888 #if defined(PETSC_HAVE_HDF5) 1889 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1890 #endif 1891 if (isbinary) { 1892 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1893 #if defined(PETSC_HAVE_HDF5) 1894 } else if (ishdf5) { 1895 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1896 #endif 1897 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1898 PetscFunctionReturn(PETSC_SUCCESS); 1899 } 1900 1901 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1902 { 1903 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1904 Mat a, b; 1905 1906 PetscFunctionBegin; 1907 a = matA->A; 1908 b = matB->A; 1909 PetscCall(MatEqual(a, b, flag)); 1910 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1911 PetscFunctionReturn(PETSC_SUCCESS); 1912 } 1913 1914 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1915 { 1916 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1917 1918 PetscFunctionBegin; 1919 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1920 PetscCall(MatDestroy(&atb->atb)); 1921 PetscCall(PetscFree(atb)); 1922 PetscFunctionReturn(PETSC_SUCCESS); 1923 } 1924 1925 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1926 { 1927 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1928 1929 PetscFunctionBegin; 1930 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1931 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1932 PetscCall(PetscFree(abt)); 1933 PetscFunctionReturn(PETSC_SUCCESS); 1934 } 1935 1936 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1937 { 1938 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1939 Mat_TransMatMultDense *atb; 1940 MPI_Comm comm; 1941 PetscMPIInt size, *recvcounts; 1942 PetscScalar *carray, *sendbuf; 1943 const PetscScalar *atbarray; 1944 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1945 const PetscInt *ranges; 1946 1947 PetscFunctionBegin; 1948 MatCheckProduct(C, 3); 1949 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1950 atb = (Mat_TransMatMultDense *)C->product->data; 1951 recvcounts = atb->recvcounts; 1952 sendbuf = atb->sendbuf; 1953 1954 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1955 PetscCallMPI(MPI_Comm_size(comm, &size)); 1956 1957 /* compute atbarray = aseq^T * bseq */ 1958 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1959 1960 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1961 1962 /* arrange atbarray into sendbuf */ 1963 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1964 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1965 for (proc = 0, k = 0; proc < size; proc++) { 1966 for (j = 0; j < cN; j++) { 1967 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1968 } 1969 } 1970 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1971 1972 /* sum all atbarray to local values of C */ 1973 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1974 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1975 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1976 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1977 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1978 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1979 PetscFunctionReturn(PETSC_SUCCESS); 1980 } 1981 1982 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1983 { 1984 MPI_Comm comm; 1985 PetscMPIInt size; 1986 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1987 Mat_TransMatMultDense *atb; 1988 PetscBool cisdense = PETSC_FALSE; 1989 PetscInt i; 1990 const PetscInt *ranges; 1991 1992 PetscFunctionBegin; 1993 MatCheckProduct(C, 4); 1994 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 1995 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1996 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1997 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 1998 } 1999 2000 /* create matrix product C */ 2001 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2002 #if defined(PETSC_HAVE_CUDA) 2003 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2004 #endif 2005 #if defined(PETSC_HAVE_HIP) 2006 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2007 #endif 2008 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2009 PetscCall(MatSetUp(C)); 2010 2011 /* create data structure for reuse C */ 2012 PetscCallMPI(MPI_Comm_size(comm, &size)); 2013 PetscCall(PetscNew(&atb)); 2014 cM = C->rmap->N; 2015 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2016 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2017 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2018 2019 C->product->data = atb; 2020 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2021 PetscFunctionReturn(PETSC_SUCCESS); 2022 } 2023 2024 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2025 { 2026 MPI_Comm comm; 2027 PetscMPIInt i, size; 2028 PetscInt maxRows, bufsiz; 2029 PetscMPIInt tag; 2030 PetscInt alg; 2031 Mat_MatTransMultDense *abt; 2032 Mat_Product *product = C->product; 2033 PetscBool flg; 2034 2035 PetscFunctionBegin; 2036 MatCheckProduct(C, 4); 2037 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2038 /* check local size of A and B */ 2039 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2040 2041 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2042 alg = flg ? 0 : 1; 2043 2044 /* setup matrix product C */ 2045 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2046 PetscCall(MatSetType(C, MATMPIDENSE)); 2047 PetscCall(MatSetUp(C)); 2048 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2049 2050 /* create data structure for reuse C */ 2051 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2052 PetscCallMPI(MPI_Comm_size(comm, &size)); 2053 PetscCall(PetscNew(&abt)); 2054 abt->tag = tag; 2055 abt->alg = alg; 2056 switch (alg) { 2057 case 1: /* alg: "cyclic" */ 2058 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2059 bufsiz = A->cmap->N * maxRows; 2060 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2061 break; 2062 default: /* alg: "allgatherv" */ 2063 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2064 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2065 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2066 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2067 break; 2068 } 2069 2070 C->product->data = abt; 2071 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2072 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2073 PetscFunctionReturn(PETSC_SUCCESS); 2074 } 2075 2076 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2077 { 2078 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2079 Mat_MatTransMultDense *abt; 2080 MPI_Comm comm; 2081 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2082 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2083 PetscInt i, cK = A->cmap->N, k, j, bn; 2084 PetscScalar _DOne = 1.0, _DZero = 0.0; 2085 const PetscScalar *av, *bv; 2086 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2087 MPI_Request reqs[2]; 2088 const PetscInt *ranges; 2089 2090 PetscFunctionBegin; 2091 MatCheckProduct(C, 3); 2092 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2093 abt = (Mat_MatTransMultDense *)C->product->data; 2094 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2095 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2096 PetscCallMPI(MPI_Comm_size(comm, &size)); 2097 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2098 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2099 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2100 PetscCall(MatDenseGetLDA(a->A, &i)); 2101 PetscCall(PetscBLASIntCast(i, &alda)); 2102 PetscCall(MatDenseGetLDA(b->A, &i)); 2103 PetscCall(PetscBLASIntCast(i, &blda)); 2104 PetscCall(MatDenseGetLDA(c->A, &i)); 2105 PetscCall(PetscBLASIntCast(i, &clda)); 2106 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2107 bn = B->rmap->n; 2108 if (blda == bn) { 2109 sendbuf = (PetscScalar *)bv; 2110 } else { 2111 sendbuf = abt->buf[0]; 2112 for (k = 0, i = 0; i < cK; i++) { 2113 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2114 } 2115 } 2116 if (size > 1) { 2117 sendto = (rank + size - 1) % size; 2118 recvfrom = (rank + size + 1) % size; 2119 } else { 2120 sendto = recvfrom = 0; 2121 } 2122 PetscCall(PetscBLASIntCast(cK, &ck)); 2123 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2124 recvisfrom = rank; 2125 for (i = 0; i < size; i++) { 2126 /* we have finished receiving in sending, bufs can be read/modified */ 2127 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2128 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2129 2130 if (nextrecvisfrom != rank) { 2131 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2132 sendsiz = cK * bn; 2133 recvsiz = cK * nextbn; 2134 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2135 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2136 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2137 } 2138 2139 /* local aseq * sendbuf^T */ 2140 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2141 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2142 2143 if (nextrecvisfrom != rank) { 2144 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2145 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2146 } 2147 bn = nextbn; 2148 recvisfrom = nextrecvisfrom; 2149 sendbuf = recvbuf; 2150 } 2151 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2152 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2153 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2154 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2155 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2156 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2161 { 2162 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2163 Mat_MatTransMultDense *abt; 2164 MPI_Comm comm; 2165 PetscMPIInt size; 2166 PetscScalar *cv, *sendbuf, *recvbuf; 2167 const PetscScalar *av, *bv; 2168 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2169 PetscScalar _DOne = 1.0, _DZero = 0.0; 2170 PetscBLASInt cm, cn, ck, alda, clda; 2171 2172 PetscFunctionBegin; 2173 MatCheckProduct(C, 3); 2174 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2175 abt = (Mat_MatTransMultDense *)C->product->data; 2176 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2177 PetscCallMPI(MPI_Comm_size(comm, &size)); 2178 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2179 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2180 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2181 PetscCall(MatDenseGetLDA(a->A, &i)); 2182 PetscCall(PetscBLASIntCast(i, &alda)); 2183 PetscCall(MatDenseGetLDA(b->A, &blda)); 2184 PetscCall(MatDenseGetLDA(c->A, &i)); 2185 PetscCall(PetscBLASIntCast(i, &clda)); 2186 /* copy transpose of B into buf[0] */ 2187 bn = B->rmap->n; 2188 sendbuf = abt->buf[0]; 2189 recvbuf = abt->buf[1]; 2190 for (k = 0, j = 0; j < bn; j++) { 2191 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2192 } 2193 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2194 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2195 PetscCall(PetscBLASIntCast(cK, &ck)); 2196 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2197 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2198 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2199 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2200 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2201 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2202 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2203 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2204 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2205 PetscFunctionReturn(PETSC_SUCCESS); 2206 } 2207 2208 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2209 { 2210 Mat_MatTransMultDense *abt; 2211 2212 PetscFunctionBegin; 2213 MatCheckProduct(C, 3); 2214 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2215 abt = (Mat_MatTransMultDense *)C->product->data; 2216 switch (abt->alg) { 2217 case 1: 2218 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2219 break; 2220 default: 2221 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2222 break; 2223 } 2224 PetscFunctionReturn(PETSC_SUCCESS); 2225 } 2226 2227 #if defined(PETSC_HAVE_ELEMENTAL) 2228 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2229 { 2230 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2231 2232 PetscFunctionBegin; 2233 PetscCall(MatDestroy(&ab->Ce)); 2234 PetscCall(MatDestroy(&ab->Ae)); 2235 PetscCall(MatDestroy(&ab->Be)); 2236 PetscCall(PetscFree(ab)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2241 { 2242 Mat_MatMultDense *ab; 2243 2244 PetscFunctionBegin; 2245 MatCheckProduct(C, 3); 2246 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2247 ab = (Mat_MatMultDense *)C->product->data; 2248 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2249 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2250 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2251 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } 2254 2255 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2256 { 2257 Mat Ae, Be, Ce; 2258 Mat_MatMultDense *ab; 2259 2260 PetscFunctionBegin; 2261 MatCheckProduct(C, 4); 2262 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2263 /* check local size of A and B */ 2264 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2265 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2266 } 2267 2268 /* create elemental matrices Ae and Be */ 2269 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2270 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2271 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2272 PetscCall(MatSetUp(Ae)); 2273 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2274 2275 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2276 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2277 PetscCall(MatSetType(Be, MATELEMENTAL)); 2278 PetscCall(MatSetUp(Be)); 2279 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2280 2281 /* compute symbolic Ce = Ae*Be */ 2282 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2283 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2284 2285 /* setup C */ 2286 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2287 PetscCall(MatSetType(C, MATDENSE)); 2288 PetscCall(MatSetUp(C)); 2289 2290 /* create data structure for reuse Cdense */ 2291 PetscCall(PetscNew(&ab)); 2292 ab->Ae = Ae; 2293 ab->Be = Be; 2294 ab->Ce = Ce; 2295 2296 C->product->data = ab; 2297 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2298 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 #endif 2302 2303 #if defined(PETSC_HAVE_ELEMENTAL) 2304 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2305 { 2306 PetscFunctionBegin; 2307 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2308 C->ops->productsymbolic = MatProductSymbolic_AB; 2309 PetscFunctionReturn(PETSC_SUCCESS); 2310 } 2311 #endif 2312 2313 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2314 { 2315 Mat_Product *product = C->product; 2316 Mat A = product->A, B = product->B; 2317 2318 PetscFunctionBegin; 2319 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2320 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2321 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2322 C->ops->productsymbolic = MatProductSymbolic_AtB; 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2327 { 2328 Mat_Product *product = C->product; 2329 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2330 PetscInt alg, nalg = 2; 2331 PetscBool flg = PETSC_FALSE; 2332 2333 PetscFunctionBegin; 2334 /* Set default algorithm */ 2335 alg = 0; /* default is allgatherv */ 2336 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2337 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2338 2339 /* Get runtime option */ 2340 if (product->api_user) { 2341 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2342 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2343 PetscOptionsEnd(); 2344 } else { 2345 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2346 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2347 PetscOptionsEnd(); 2348 } 2349 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2350 2351 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2352 C->ops->productsymbolic = MatProductSymbolic_ABt; 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2357 { 2358 Mat_Product *product = C->product; 2359 2360 PetscFunctionBegin; 2361 switch (product->type) { 2362 #if defined(PETSC_HAVE_ELEMENTAL) 2363 case MATPRODUCT_AB: 2364 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2365 break; 2366 #endif 2367 case MATPRODUCT_AtB: 2368 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2369 break; 2370 case MATPRODUCT_ABt: 2371 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2372 break; 2373 default: 2374 break; 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378