1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23d71ae5a4SJacob Faibussowitsch { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31d71ae5a4SJacob Faibussowitsch { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 339f196a02SMartin Diehl PetscBool isascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 399f196a02SMartin Diehl if (isascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59d71ae5a4SJacob Faibussowitsch { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 72462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 76462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 844dfa11a4SJacob Faibussowitsch info->memory = 0; /* REVIEW ME */ 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92d71ae5a4SJacob Faibussowitsch { 93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94b12397e7SPierre Jolivet 95d24d4204SJose E. Roman PetscFunctionBegin; 96d24d4204SJose E. Roman switch (op) { 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 100d24d4204SJose E. Roman case MAT_SYMMETRIC: 101d24d4204SJose E. Roman case MAT_SORTED_FULL: 102d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 103d71ae5a4SJacob Faibussowitsch break; 104d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 105d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 106d71ae5a4SJacob Faibussowitsch break; 107d71ae5a4SJacob Faibussowitsch default: 108d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109d24d4204SJose E. Roman } 1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman 113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114d71ae5a4SJacob Faibussowitsch { 115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116d24d4204SJose E. Roman PetscInt i, j; 117d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 119d24d4204SJose E. Roman 120d24d4204SJose E. Roman PetscFunctionBegin; 121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122d24d4204SJose E. Roman for (i = 0; i < nr; i++) { 123d24d4204SJose E. Roman if (rows[i] < 0) continue; 1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125d24d4204SJose E. Roman for (j = 0; j < nc; j++) { 126d24d4204SJose E. Roman if (cols[j] < 0) continue; 1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130b12397e7SPierre Jolivet if (roworiented) { 131d24d4204SJose E. Roman switch (imode) { 132d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 133d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134d71ae5a4SJacob Faibussowitsch break; 135d71ae5a4SJacob Faibussowitsch default: 136d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137d71ae5a4SJacob Faibussowitsch break; 138b12397e7SPierre Jolivet } 139b12397e7SPierre Jolivet } else { 140b12397e7SPierre Jolivet switch (imode) { 141d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 142d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143d71ae5a4SJacob Faibussowitsch break; 144d71ae5a4SJacob Faibussowitsch default: 145d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146d71ae5a4SJacob Faibussowitsch break; 147b12397e7SPierre Jolivet } 148d24d4204SJose E. Roman } 149d24d4204SJose E. Roman } else { 15028b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 152b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153d24d4204SJose E. Roman } 154d24d4204SJose E. Roman } 155d24d4204SJose E. Roman } 1563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 157d24d4204SJose E. Roman } 158d24d4204SJose E. Roman 1595e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160d71ae5a4SJacob Faibussowitsch { 161d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0; 163d24d4204SJose E. Roman const PetscInt *ranges; 164d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman PetscFunctionBegin; 167d24d4204SJose E. Roman if (transpose) { 168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 1716497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); 172792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1749566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176d24d4204SJose E. Roman ylld = 1; 177792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 179d24d4204SJose E. Roman 180d24d4204SJose E. Roman /* allocate 2d vectors */ 181d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 1846497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 189792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 191d24d4204SJose E. Roman 192d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 193c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194d24d4204SJose E. Roman 195d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 196792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* call PBLAS subroutine */ 1995e4d33a3SBlanca Mellado Pinto if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 2005e4d33a3SBlanca Mellado Pinto else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 201d24d4204SJose E. Roman 202d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 203792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 204d24d4204SJose E. Roman 205d24d4204SJose E. Roman } else { /* non-transpose */ 206d24d4204SJose E. Roman 207d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 2089566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 2099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 210d24d4204SJose E. Roman xlld = 1; 211792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 212d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 2139566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 2149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 2156497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld)); 216792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 217d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 218d24d4204SJose E. Roman 219d24d4204SJose E. Roman /* allocate 2d vectors */ 220d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 221d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 2229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 2236497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld)); 224d24d4204SJose E. Roman 225d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 226792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 227d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 228792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 229d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 230d24d4204SJose E. Roman 231d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 232c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 233d24d4204SJose E. Roman 234d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 235792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 236d24d4204SJose E. Roman 237d24d4204SJose E. Roman /* call PBLAS subroutine */ 238792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 239d24d4204SJose E. Roman 240d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 241792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 242d24d4204SJose E. Roman } 2439566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d)); 2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 245d24d4204SJose E. Roman } 246d24d4204SJose E. Roman 247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 248d71ae5a4SJacob Faibussowitsch { 249d24d4204SJose E. Roman const PetscScalar *xarray; 250d24d4204SJose E. Roman PetscScalar *yarray; 251d24d4204SJose E. Roman 252d24d4204SJose E. Roman PetscFunctionBegin; 2539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2549566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2555e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); 2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259d24d4204SJose E. Roman } 260d24d4204SJose E. Roman 261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 262d71ae5a4SJacob Faibussowitsch { 263d24d4204SJose E. Roman const PetscScalar *xarray; 264d24d4204SJose E. Roman PetscScalar *yarray; 265d24d4204SJose E. Roman 266d24d4204SJose E. Roman PetscFunctionBegin; 2679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2695e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 273d24d4204SJose E. Roman } 274d24d4204SJose E. Roman 2755e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 2765e4d33a3SBlanca Mellado Pinto { 2775e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray; 2785e4d33a3SBlanca Mellado Pinto PetscScalar *yarray; 2795e4d33a3SBlanca Mellado Pinto 2805e4d33a3SBlanca Mellado Pinto PetscFunctionBegin; 2815e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray)); 2825e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayWrite(y, &yarray)); 2835e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); 2845e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray)); 2855e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayWrite(y, &yarray)); 2865e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 2875e4d33a3SBlanca Mellado Pinto } 2885e4d33a3SBlanca Mellado Pinto 289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290d71ae5a4SJacob Faibussowitsch { 291d24d4204SJose E. Roman const PetscScalar *xarray; 292d24d4204SJose E. Roman PetscScalar *zarray; 293d24d4204SJose E. Roman 294d24d4204SJose E. Roman PetscFunctionBegin; 2959566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2985e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); 2999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 302d24d4204SJose E. Roman } 303d24d4204SJose E. Roman 304d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 305d71ae5a4SJacob Faibussowitsch { 306d24d4204SJose E. Roman const PetscScalar *xarray; 307d24d4204SJose E. Roman PetscScalar *zarray; 308d24d4204SJose E. Roman 309d24d4204SJose E. Roman PetscFunctionBegin; 3109566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 3119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 3129566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 3135e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); 3145e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray)); 3155e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArray(z, &zarray)); 3165e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 3175e4d33a3SBlanca Mellado Pinto } 3185e4d33a3SBlanca Mellado Pinto 3195e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 3205e4d33a3SBlanca Mellado Pinto { 3215e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray; 3225e4d33a3SBlanca Mellado Pinto PetscScalar *zarray; 3235e4d33a3SBlanca Mellado Pinto 3245e4d33a3SBlanca Mellado Pinto PetscFunctionBegin; 3255e4d33a3SBlanca Mellado Pinto if (y != z) PetscCall(VecCopy(y, z)); 3265e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray)); 3275e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArray(z, &zarray)); 3285e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); 3299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 332d24d4204SJose E. Roman } 333d24d4204SJose E. Roman 334d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 335d71ae5a4SJacob Faibussowitsch { 336d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 337d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 338d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 339d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 340d24d4204SJose E. Roman PetscBLASInt one = 1; 341d24d4204SJose E. Roman 342d24d4204SJose E. Roman PetscFunctionBegin; 343792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 344d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 346d24d4204SJose E. Roman } 347d24d4204SJose E. Roman 348d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 349d71ae5a4SJacob Faibussowitsch { 350d24d4204SJose E. Roman PetscFunctionBegin; 3519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3529566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3539566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 354d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 3553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 356d24d4204SJose E. Roman } 357d24d4204SJose E. Roman 358e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 359e0d15688SBlanca Mellado Pinto { 360e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 361e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 362e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 363e0d15688SBlanca Mellado Pinto PetscScalar sone = 1.0, zero = 0.0; 364e0d15688SBlanca Mellado Pinto PetscBLASInt one = 1; 365e0d15688SBlanca Mellado Pinto 366e0d15688SBlanca Mellado Pinto PetscFunctionBegin; 367e0d15688SBlanca Mellado Pinto PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 368e0d15688SBlanca Mellado Pinto C->assembled = PETSC_TRUE; 369e0d15688SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 370e0d15688SBlanca Mellado Pinto } 371e0d15688SBlanca Mellado Pinto 372e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 373e0d15688SBlanca Mellado Pinto { 374e0d15688SBlanca Mellado Pinto PetscFunctionBegin; 375e0d15688SBlanca Mellado Pinto PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 376e0d15688SBlanca Mellado Pinto PetscCall(MatSetType(C, MATSCALAPACK)); 377e0d15688SBlanca Mellado Pinto PetscCall(MatSetUp(C)); 378e0d15688SBlanca Mellado Pinto C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK; 379e0d15688SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 380e0d15688SBlanca Mellado Pinto } 381e0d15688SBlanca Mellado Pinto 382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 383d71ae5a4SJacob Faibussowitsch { 384d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 385d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 386d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 387d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 388d24d4204SJose E. Roman PetscBLASInt one = 1; 389d24d4204SJose E. Roman 390d24d4204SJose E. Roman PetscFunctionBegin; 391792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 392d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 394d24d4204SJose E. Roman } 395d24d4204SJose E. Roman 396d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 397d71ae5a4SJacob Faibussowitsch { 398d24d4204SJose E. Roman PetscFunctionBegin; 3999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 4009566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 4019566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 402e0d15688SBlanca Mellado Pinto C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK; 4033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 404d24d4204SJose E. Roman } 405d24d4204SJose E. Roman 406d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 407d71ae5a4SJacob Faibussowitsch { 408d24d4204SJose E. Roman Mat_Product *product = C->product; 409d24d4204SJose E. Roman 410d24d4204SJose E. Roman PetscFunctionBegin; 411d24d4204SJose E. Roman switch (product->type) { 412d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 413e0d15688SBlanca Mellado Pinto C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 414e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_AB; 415e0d15688SBlanca Mellado Pinto break; 416e0d15688SBlanca Mellado Pinto case MATPRODUCT_AtB: 417e0d15688SBlanca Mellado Pinto C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK; 418e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_AtB; 419d71ae5a4SJacob Faibussowitsch break; 420d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 421e0d15688SBlanca Mellado Pinto C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 422e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_ABt; 423d71ae5a4SJacob Faibussowitsch break; 424d71ae5a4SJacob Faibussowitsch default: 425d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 426d24d4204SJose E. Roman } 4273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 428d24d4204SJose E. Roman } 429d24d4204SJose E. Roman 430d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 431d71ae5a4SJacob Faibussowitsch { 432d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 433d24d4204SJose E. Roman PetscScalar *darray, *d2d, v; 434d24d4204SJose E. Roman const PetscInt *ranges; 435d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 436d24d4204SJose E. Roman 437d24d4204SJose E. Roman PetscFunctionBegin; 4389566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray)); 439d24d4204SJose E. Roman 440d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */ 441d24d4204SJose E. Roman 442d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4439566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 4449566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 4456497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); 446792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 447d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 448d24d4204SJose E. Roman 449d24d4204SJose E. Roman /* allocate 2d vector */ 450d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 4519566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 4526497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); 453d24d4204SJose E. Roman 454d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 455792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 456d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 457d24d4204SJose E. Roman 458d24d4204SJose E. Roman /* collect diagonal */ 459d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) { 460792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 461792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 462d24d4204SJose E. Roman } 463d24d4204SJose E. Roman 464d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 465792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 4669566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 467d24d4204SJose E. Roman 468d24d4204SJose E. Roman } else { /* column version */ 469d24d4204SJose E. Roman 470d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4719566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473d24d4204SJose E. Roman dlld = 1; 474792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 476d24d4204SJose E. Roman 477d24d4204SJose E. Roman /* allocate 2d vector */ 478d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4799566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 480d24d4204SJose E. Roman 481d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 484d24d4204SJose E. Roman 485d24d4204SJose E. Roman /* collect diagonal */ 486d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) { 487792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 488792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 489d24d4204SJose E. Roman } 490d24d4204SJose E. Roman 491d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 492792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 4939566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 494d24d4204SJose E. Roman } 495d24d4204SJose E. Roman 4969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray)); 4979566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4989566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 4993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 500d24d4204SJose E. Roman } 501d24d4204SJose E. Roman 502d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 503d71ae5a4SJacob Faibussowitsch { 504d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 505d24d4204SJose E. Roman const PetscScalar *d; 506d24d4204SJose E. Roman const PetscInt *ranges; 507d24d4204SJose E. Roman PetscScalar *d2d; 508d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 509d24d4204SJose E. Roman 510d24d4204SJose E. Roman PetscFunctionBegin; 511d24d4204SJose E. Roman if (R) { 512eb8278ccSPierre Jolivet PetscCall(VecGetArrayRead(R, &d)); 513d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5149566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 5159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 516d24d4204SJose E. Roman dlld = 1; 517792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 518d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 519d24d4204SJose E. Roman 520d24d4204SJose E. Roman /* allocate 2d vector */ 521d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 5229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 523d24d4204SJose E. Roman 524d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 525792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 526d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 527d24d4204SJose E. Roman 528d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 529c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 530d24d4204SJose E. Roman 531d24d4204SJose E. Roman /* broadcast along process columns */ 532d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 533d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 534d24d4204SJose E. Roman 535d24d4204SJose E. Roman /* local scaling */ 5369371c9d4SSatish Balay for (j = 0; j < a->locc; j++) 5379371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 538d24d4204SJose E. Roman 5399566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 540eb8278ccSPierre Jolivet PetscCall(VecRestoreArrayRead(R, &d)); 541d24d4204SJose E. Roman } 542d24d4204SJose E. Roman if (L) { 543eb8278ccSPierre Jolivet PetscCall(VecGetArrayRead(L, &d)); 544d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5459566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 5469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 5476497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); 548792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 549d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 550d24d4204SJose E. Roman 551d24d4204SJose E. Roman /* allocate 2d vector */ 552d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 5539566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 5546497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); 555d24d4204SJose E. Roman 556d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 557792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 558d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 559d24d4204SJose E. Roman 560d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 561c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 562d24d4204SJose E. Roman 563d24d4204SJose E. Roman /* broadcast along process rows */ 564d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 565d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 566d24d4204SJose E. Roman 567d24d4204SJose E. Roman /* local scaling */ 5689371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 5699371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 570d24d4204SJose E. Roman 5719566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 572eb8278ccSPierre Jolivet PetscCall(VecRestoreArrayRead(L, &d)); 573d24d4204SJose E. Roman } 5743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 575d24d4204SJose E. Roman } 576d24d4204SJose E. Roman 577d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 578d71ae5a4SJacob Faibussowitsch { 579d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 580d24d4204SJose E. Roman PetscBLASInt n, one = 1; 581d24d4204SJose E. Roman 582d24d4204SJose E. Roman PetscFunctionBegin; 583d24d4204SJose E. Roman n = x->lld * x->locc; 584792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 5853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 586d24d4204SJose E. Roman } 587d24d4204SJose E. Roman 588d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 589d71ae5a4SJacob Faibussowitsch { 590d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 591d24d4204SJose E. Roman PetscBLASInt i, n; 592d24d4204SJose E. Roman PetscScalar v; 593d24d4204SJose E. Roman 594d24d4204SJose E. Roman PetscFunctionBegin; 595d24d4204SJose E. Roman n = PetscMin(x->M, x->N); 596d24d4204SJose E. Roman for (i = 1; i <= n; i++) { 597792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 598d24d4204SJose E. Roman v += alpha; 599792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 600d24d4204SJose E. Roman } 6013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 602d24d4204SJose E. Roman } 603d24d4204SJose E. Roman 604d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 605d71ae5a4SJacob Faibussowitsch { 606d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 607d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 608d24d4204SJose E. Roman PetscBLASInt one = 1; 609d24d4204SJose E. Roman PetscScalar beta = 1.0; 610d24d4204SJose E. Roman 611d24d4204SJose E. Roman PetscFunctionBegin; 612d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3); 613792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 6149566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 6153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 616d24d4204SJose E. Roman } 617d24d4204SJose E. Roman 618d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 619d71ae5a4SJacob Faibussowitsch { 620d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 621d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 622d24d4204SJose E. Roman 623d24d4204SJose E. Roman PetscFunctionBegin; 6249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 6259566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 6263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 627d24d4204SJose E. Roman } 628d24d4204SJose E. Roman 629d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 630d71ae5a4SJacob Faibussowitsch { 631d24d4204SJose E. Roman Mat Bs; 632d24d4204SJose E. Roman MPI_Comm comm; 633d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 634d24d4204SJose E. Roman 635d24d4204SJose E. Roman PetscFunctionBegin; 6369566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 6379566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs)); 6389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 6399566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK)); 640d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 641d24d4204SJose E. Roman b->M = a->M; 642d24d4204SJose E. Roman b->N = a->N; 643d24d4204SJose E. Roman b->mb = a->mb; 644d24d4204SJose E. Roman b->nb = a->nb; 645d24d4204SJose E. Roman b->rsrc = a->rsrc; 646d24d4204SJose E. Roman b->csrc = a->csrc; 6479566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 648d24d4204SJose E. Roman *B = Bs; 64948a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 650d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 652d24d4204SJose E. Roman } 653d24d4204SJose E. Roman 654d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 655d71ae5a4SJacob Faibussowitsch { 656d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 657d24d4204SJose E. Roman Mat Bs = *B; 658d24d4204SJose E. Roman PetscBLASInt one = 1; 659d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 660d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 661d24d4204SJose E. Roman PetscInt i, j; 662d24d4204SJose E. Roman #endif 663d24d4204SJose E. Roman 664d24d4204SJose E. Roman PetscFunctionBegin; 6657fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 6660fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 6679566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 668d24d4204SJose E. Roman *B = Bs; 669d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 670792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 671d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 672d24d4204SJose E. Roman /* undo conjugation */ 6739371c9d4SSatish Balay for (i = 0; i < b->locr; i++) 6749371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 675d24d4204SJose E. Roman #endif 676d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 678d24d4204SJose E. Roman } 679d24d4204SJose E. Roman 680d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 681d71ae5a4SJacob Faibussowitsch { 682d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 683d24d4204SJose E. Roman PetscInt i, j; 684d24d4204SJose E. Roman 685d24d4204SJose E. Roman PetscFunctionBegin; 6869371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 6879371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 6883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 689d24d4204SJose E. Roman } 690d24d4204SJose E. Roman 691d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 692d71ae5a4SJacob Faibussowitsch { 693d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 694d24d4204SJose E. Roman Mat Bs = *B; 695d24d4204SJose E. Roman PetscBLASInt one = 1; 696d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 697d24d4204SJose E. Roman 698d24d4204SJose E. Roman PetscFunctionBegin; 6990fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 7009566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 701d24d4204SJose E. Roman *B = Bs; 702d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 703792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 704d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 7053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 706d24d4204SJose E. Roman } 707d24d4204SJose E. Roman 708d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 709d71ae5a4SJacob Faibussowitsch { 710d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 711d24d4204SJose E. Roman PetscScalar *x, *x2d; 712d24d4204SJose E. Roman const PetscInt *ranges; 713d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 714d24d4204SJose E. Roman 715d24d4204SJose E. Roman PetscFunctionBegin; 7169566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X)); 7179566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 718d24d4204SJose E. Roman 719d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 7209566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 7219566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 7226497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); 723792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 724d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 725d24d4204SJose E. Roman 726d24d4204SJose E. Roman /* allocate 2d vector */ 727d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 7289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d)); 7296497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); 730d24d4204SJose E. Roman 731d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 732792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 733d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 734d24d4204SJose E. Roman 735d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 736792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 737d24d4204SJose E. Roman 738d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 739d24d4204SJose E. Roman switch (A->factortype) { 740d24d4204SJose E. Roman case MAT_FACTOR_LU: 741792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 742d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 743d24d4204SJose E. Roman break; 744d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 745792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 746d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 747d24d4204SJose E. Roman break; 748d71ae5a4SJacob Faibussowitsch default: 749d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 750d24d4204SJose E. Roman } 751d24d4204SJose E. Roman 752d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 753792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 754d24d4204SJose E. Roman 7559566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 7569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 7573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 758d24d4204SJose E. Roman } 759d24d4204SJose E. Roman 760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 761d71ae5a4SJacob Faibussowitsch { 762d24d4204SJose E. Roman PetscFunctionBegin; 7639566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X)); 7649566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y)); 7653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 766d24d4204SJose E. Roman } 767d24d4204SJose E. Roman 768d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 769d71ae5a4SJacob Faibussowitsch { 7705d5af635SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x; 771d24d4204SJose E. Roman PetscBool flg1, flg2; 772d24d4204SJose E. Roman PetscBLASInt one = 1, info; 7735d5af635SJose E. Roman Mat C; 7745d5af635SJose E. Roman MatType type; 775d24d4204SJose E. Roman 776d24d4204SJose E. Roman PetscFunctionBegin; 7779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 7789566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 7795d5af635SJose E. Roman if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3); 7805d5af635SJose E. Roman if (flg2) { 7815d5af635SJose E. Roman if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 7825d5af635SJose E. Roman else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X)); 7835d5af635SJose E. Roman C = X; 7845d5af635SJose E. Roman } else { 7855d5af635SJose E. Roman PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C)); 7865d5af635SJose E. Roman } 7875d5af635SJose E. Roman x = (Mat_ScaLAPACK *)C->data; 788d24d4204SJose E. Roman 789d24d4204SJose E. Roman switch (A->factortype) { 790d24d4204SJose E. Roman case MAT_FACTOR_LU: 791792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 792d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 793d24d4204SJose E. Roman break; 794d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 795792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 796d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 797d24d4204SJose E. Roman break; 798d71ae5a4SJacob Faibussowitsch default: 799d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 800d24d4204SJose E. Roman } 8015d5af635SJose E. Roman if (!flg2) { 8025d5af635SJose E. Roman PetscCall(MatGetType(X, &type)); 8035d5af635SJose E. Roman PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X)); 8045d5af635SJose E. Roman PetscCall(MatDestroy(&C)); 8055d5af635SJose E. Roman } 8063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 807d24d4204SJose E. Roman } 808d24d4204SJose E. Roman 809d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 810d71ae5a4SJacob Faibussowitsch { 811d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 812d24d4204SJose E. Roman PetscBLASInt one = 1, info; 813d24d4204SJose E. Roman 814d24d4204SJose E. Roman PetscFunctionBegin; 815aa624791SPierre Jolivet if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 816792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 817d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info); 818d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 819d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 820d24d4204SJose E. Roman 8219566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 824d24d4204SJose E. Roman } 825d24d4204SJose E. Roman 826d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 827d71ae5a4SJacob Faibussowitsch { 828d24d4204SJose E. Roman PetscFunctionBegin; 8299566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8309566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 8313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 832d24d4204SJose E. Roman } 833d24d4204SJose E. Roman 834d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 835d71ae5a4SJacob Faibussowitsch { 836d24d4204SJose E. Roman PetscFunctionBegin; 837d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 839d24d4204SJose E. Roman } 840d24d4204SJose E. Roman 841d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 842d71ae5a4SJacob Faibussowitsch { 843d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 844d24d4204SJose E. Roman PetscBLASInt one = 1, info; 845d24d4204SJose E. Roman 846d24d4204SJose E. Roman PetscFunctionBegin; 847792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 848d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info); 849d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 850d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 851d24d4204SJose E. Roman 8529566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8539566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 855d24d4204SJose E. Roman } 856d24d4204SJose E. Roman 857d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 858d71ae5a4SJacob Faibussowitsch { 859d24d4204SJose E. Roman PetscFunctionBegin; 8609566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8619566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 8623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 863d24d4204SJose E. Roman } 864d24d4204SJose E. Roman 865d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 866d71ae5a4SJacob Faibussowitsch { 867d24d4204SJose E. Roman PetscFunctionBegin; 868d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 870d24d4204SJose E. Roman } 871d24d4204SJose E. Roman 87266976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 873d71ae5a4SJacob Faibussowitsch { 874d24d4204SJose E. Roman PetscFunctionBegin; 875d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 8763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 877d24d4204SJose E. Roman } 878d24d4204SJose E. Roman 879d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 880d71ae5a4SJacob Faibussowitsch { 881d24d4204SJose E. Roman Mat B; 88259172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 883d24d4204SJose E. Roman 884d24d4204SJose E. Roman PetscFunctionBegin; 885d24d4204SJose E. Roman /* Create the factorization matrix */ 8869566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 88766e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 888d24d4204SJose E. Roman B->factortype = ftype; 8899566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8909566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 891d24d4204SJose E. Roman 8929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 893d24d4204SJose E. Roman *F = B; 8943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 895d24d4204SJose E. Roman } 896d24d4204SJose E. Roman 897d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 898d71ae5a4SJacob Faibussowitsch { 899d24d4204SJose E. Roman PetscFunctionBegin; 9009566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 9019566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 9023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 903d24d4204SJose E. Roman } 904d24d4204SJose E. Roman 905d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 906d71ae5a4SJacob Faibussowitsch { 907d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 908d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0; 909d24d4204SJose E. Roman const char *ntype; 910d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy; 911d24d4204SJose E. Roman 912d24d4204SJose E. Roman PetscFunctionBegin; 913d24d4204SJose E. Roman switch (type) { 914d24d4204SJose E. Roman case NORM_1: 915d24d4204SJose E. Roman ntype = "1"; 916d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 917d24d4204SJose E. Roman break; 918d24d4204SJose E. Roman case NORM_FROBENIUS: 919d24d4204SJose E. Roman ntype = "F"; 920d24d4204SJose E. Roman work = &dummy; 921d24d4204SJose E. Roman break; 922d24d4204SJose E. Roman case NORM_INFINITY: 923d24d4204SJose E. Roman ntype = "I"; 924d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 925d24d4204SJose E. Roman break; 926d71ae5a4SJacob Faibussowitsch default: 927d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 928d24d4204SJose E. Roman } 9299566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 930d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 9319566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 9323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 933d24d4204SJose E. Roman } 934d24d4204SJose E. Roman 935d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 936d71ae5a4SJacob Faibussowitsch { 937d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 938d24d4204SJose E. Roman 939d24d4204SJose E. Roman PetscFunctionBegin; 9409566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 9413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 942d24d4204SJose E. Roman } 943d24d4204SJose E. Roman 944d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 945d71ae5a4SJacob Faibussowitsch { 946d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 947d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx; 948d24d4204SJose E. Roman 949d24d4204SJose E. Roman PetscFunctionBegin; 950d24d4204SJose E. Roman if (rows) { 951d24d4204SJose E. Roman n = a->locr; 952d24d4204SJose E. Roman nb = a->mb; 953d24d4204SJose E. Roman isrc = a->rsrc; 954d24d4204SJose E. Roman nproc = a->grid->nprow; 955d24d4204SJose E. Roman iproc = a->grid->myrow; 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 957d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9589566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 959d24d4204SJose E. Roman } 960d24d4204SJose E. Roman if (cols) { 961d24d4204SJose E. Roman n = a->locc; 962d24d4204SJose E. Roman nb = a->nb; 963d24d4204SJose E. Roman isrc = a->csrc; 964d24d4204SJose E. Roman nproc = a->grid->npcol; 965d24d4204SJose E. Roman iproc = a->grid->mycol; 9669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 967d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9689566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 969d24d4204SJose E. Roman } 9703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 971d24d4204SJose E. Roman } 972d24d4204SJose E. Roman 973d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 974d71ae5a4SJacob Faibussowitsch { 975d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 976d24d4204SJose E. Roman Mat Bmpi; 977d24d4204SJose E. Roman MPI_Comm comm; 978a00b6df3SJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 9794b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork; 9804b1a79daSJose E. Roman const PetscScalar *vwork; 981d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 982d24d4204SJose E. Roman PetscScalar *barray; 9834b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE; 9844b1a79daSJose E. Roman PetscMPIInt size; 985d24d4204SJose E. Roman 986d24d4204SJose E. Roman PetscFunctionBegin; 9879566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9889566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 9894b1a79daSJose E. Roman 9904b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9929566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 9939371c9d4SSatish Balay for (i = 0; i < size; i++) 9949371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) { 9959371c9d4SSatish Balay differ = PETSC_TRUE; 9969371c9d4SSatish Balay break; 9979371c9d4SSatish Balay } 9984b1a79daSJose E. Roman } 9994b1a79daSJose E. Roman 10004b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 10019566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 10024b1a79daSJose E. Roman m = PETSC_DECIDE; 10039566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 10044b1a79daSJose E. Roman n = PETSC_DECIDE; 10059566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10069566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10079566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 10089566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 10094b1a79daSJose E. Roman 10104b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10119566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1012a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 10136497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ 1014792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 10154b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info); 10164b1a79daSJose E. Roman 10174b1a79daSJose E. Roman /* redistribute matrix */ 10189566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1019792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10209566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 10219566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10229566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 10234b1a79daSJose E. Roman 10244b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 10259566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 10264b1a79daSJose E. Roman for (i = rstart; i < rend; i++) { 10279566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 10289566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 10299566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 10304b1a79daSJose E. Roman } 10319566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 10329566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 10339566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 10344b1a79daSJose E. Roman 10354b1a79daSJose E. Roman } else { /* normal cases */ 1036d24d4204SJose E. Roman 1037d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1038d24d4204SJose E. Roman else { 10399566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 104092c846b4SJose E. Roman m = PETSC_DECIDE; 10419566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 104292c846b4SJose E. Roman n = PETSC_DECIDE; 10439566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10459566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 10469566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1047d24d4204SJose E. Roman } 1048d24d4204SJose E. Roman 1049d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10509566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1051a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 10526497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ 1053792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1054d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1055d24d4204SJose E. Roman 1056d24d4204SJose E. Roman /* redistribute matrix */ 10579566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1058792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10599566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1060d24d4204SJose E. Roman 10619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1063ac530a7eSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi)); 1064ac530a7eSPierre Jolivet else *B = Bmpi; 10654b1a79daSJose E. Roman } 10663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1067d24d4204SJose E. Roman } 1068d24d4204SJose E. Roman 1069d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1070d71ae5a4SJacob Faibussowitsch { 1071b12397e7SPierre Jolivet const PetscInt *ranges; 1072b12397e7SPierre Jolivet PetscMPIInt size; 1073b12397e7SPierre Jolivet PetscInt i, n; 1074b12397e7SPierre Jolivet 1075b12397e7SPierre Jolivet PetscFunctionBegin; 1076b12397e7SPierre Jolivet *correct = PETSC_TRUE; 1077b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1078b12397e7SPierre Jolivet if (size > 1) { 1079b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges)); 1080b12397e7SPierre Jolivet n = ranges[1] - ranges[0]; 10819371c9d4SSatish Balay for (i = 1; i < size; i++) 10829371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break; 1083b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1084b12397e7SPierre Jolivet } 10853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1086b12397e7SPierre Jolivet } 1087b12397e7SPierre Jolivet 1088d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1089d71ae5a4SJacob Faibussowitsch { 1090d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1091d24d4204SJose E. Roman Mat Bmpi; 1092d24d4204SJose E. Roman MPI_Comm comm; 109392c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1094b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols; 1095d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1096c87776d3SJose E. Roman const PetscScalar *aarray; 1097b12397e7SPierre Jolivet IS ir, ic; 10984e8b52a1SJose E. Roman PetscInt lda; 1099b12397e7SPierre Jolivet PetscBool flg; 1100d24d4204SJose E. Roman 1101d24d4204SJose E. Roman PetscFunctionBegin; 11029566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1103d24d4204SJose E. Roman 1104d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1105d24d4204SJose E. Roman else { 11069566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 110792c846b4SJose E. Roman m = PETSC_DECIDE; 11089566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 110992c846b4SJose E. Roman n = PETSC_DECIDE; 11109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 11119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 11129566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 11139566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1114d24d4204SJose E. Roman } 1115d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data; 1116d24d4204SJose E. Roman 1117b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 1118c87776d3SJose E. Roman PetscCall(MatDenseGetArrayRead(A, &aarray)); 1119b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1120b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1121b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1122d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 11239566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 11249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 11256497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */ 1126792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1127d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1128d24d4204SJose E. Roman 1129d24d4204SJose E. Roman /* redistribute matrix */ 1130792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1131b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1132b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1133b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1134b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1135b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1136b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows)); 1137b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols)); 1138b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1139b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows)); 1140b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols)); 1141b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1142b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1143b12397e7SPierre Jolivet } 1144c87776d3SJose E. Roman PetscCall(MatDenseRestoreArrayRead(A, &aarray)); 11459566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 11469566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1147ac530a7eSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi)); 1148ac530a7eSPierre Jolivet else *B = Bmpi; 11493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1150d24d4204SJose E. Roman } 1151d24d4204SJose E. Roman 1152d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1153d71ae5a4SJacob Faibussowitsch { 1154d24d4204SJose E. Roman Mat mat_scal; 1155d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1156d24d4204SJose E. Roman const PetscInt *cols; 1157d24d4204SJose E. Roman const PetscScalar *vals; 1158d24d4204SJose E. Roman 1159d24d4204SJose E. Roman PetscFunctionBegin; 1160d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1161d24d4204SJose E. Roman mat_scal = *newmat; 11629566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1163d24d4204SJose E. Roman } else { 11649566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1165d24d4204SJose E. Roman m = PETSC_DECIDE; 11669566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1167d24d4204SJose E. Roman n = PETSC_DECIDE; 11689566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11699566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11709566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11719566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1172d24d4204SJose E. Roman } 1173d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11749566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11759566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 11769566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1177d24d4204SJose E. Roman } 11789566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11799566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1180d24d4204SJose E. Roman 11819566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1182d24d4204SJose E. Roman else *newmat = mat_scal; 11833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1184d24d4204SJose E. Roman } 1185d24d4204SJose E. Roman 1186d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1187d71ae5a4SJacob Faibussowitsch { 1188d24d4204SJose E. Roman Mat mat_scal; 1189d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1190d24d4204SJose E. Roman const PetscInt *cols; 1191d24d4204SJose E. Roman const PetscScalar *vals; 1192d24d4204SJose E. Roman PetscScalar v; 1193d24d4204SJose E. Roman 1194d24d4204SJose E. Roman PetscFunctionBegin; 1195d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1196d24d4204SJose E. Roman mat_scal = *newmat; 11979566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1198d24d4204SJose E. Roman } else { 11999566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1200d24d4204SJose E. Roman m = PETSC_DECIDE; 12019566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1202d24d4204SJose E. Roman n = PETSC_DECIDE; 12039566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 12049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 12059566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 12069566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1207d24d4204SJose E. Roman } 12089566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1209d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 12109566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 12119566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1212d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */ 1213d24d4204SJose E. Roman if (cols[j] == row) continue; 1214b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 12159566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1216d24d4204SJose E. Roman } 12179566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1218d24d4204SJose E. Roman } 12199566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 12209566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 12219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1222d24d4204SJose E. Roman 12239566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1224d24d4204SJose E. Roman else *newmat = mat_scal; 12253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1226d24d4204SJose E. Roman } 1227d24d4204SJose E. Roman 1228d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1229d71ae5a4SJacob Faibussowitsch { 1230d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1231d24d4204SJose E. Roman PetscInt sz = 0; 1232d24d4204SJose E. Roman 1233d24d4204SJose E. Roman PetscFunctionBegin; 12349566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12359566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1236d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1237d24d4204SJose E. Roman 12389566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12399566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 12409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc)); 1241d24d4204SJose E. Roman 1242d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 12433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1244d24d4204SJose E. Roman } 1245d24d4204SJose E. Roman 1246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1247d71ae5a4SJacob Faibussowitsch { 1248d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1249d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1250b8b5be36SMartin Diehl PetscMPIInt iflg; 1251d24d4204SJose E. Roman MPI_Comm icomm; 1252d24d4204SJose E. Roman 1253d24d4204SJose E. Roman PetscFunctionBegin; 12549566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 12559566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12569566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 12579566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1258b8b5be36SMartin Diehl PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg)); 1259d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1260d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1261d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1262d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 12639566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 12649566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1265d24d4204SJose E. Roman } 12669566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 12679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 12689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 12709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 12719566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 12723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1273d24d4204SJose E. Roman } 1274d24d4204SJose E. Roman 127566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1276d71ae5a4SJacob Faibussowitsch { 1277d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1278d24d4204SJose E. Roman PetscBLASInt info = 0; 1279b12397e7SPierre Jolivet PetscBool flg; 1280d24d4204SJose E. Roman 1281d24d4204SJose E. Roman PetscFunctionBegin; 12829566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12839566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1284d24d4204SJose E. Roman 1285b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1286b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1287b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1288b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1289b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1290d24d4204SJose E. Roman 1291d24d4204SJose E. Roman /* compute local sizes */ 12929566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 12939566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1294d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1295d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1296d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr); 1297d24d4204SJose E. Roman 1298d24d4204SJose E. Roman /* allocate local array */ 12999566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1300d24d4204SJose E. Roman 1301d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1302792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1303d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 13043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1305d24d4204SJose E. Roman } 1306d24d4204SJose E. Roman 130766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1308d71ae5a4SJacob Faibussowitsch { 1309d24d4204SJose E. Roman PetscInt nstash, reallocs; 1310d24d4204SJose E. Roman 1311d24d4204SJose E. Roman PetscFunctionBegin; 13123ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 13139566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 13149566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 13159566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 13163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1317d24d4204SJose E. Roman } 1318d24d4204SJose E. Roman 131966976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1320d71ae5a4SJacob Faibussowitsch { 1321d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1322d24d4204SJose E. Roman PetscMPIInt n; 1323d24d4204SJose E. Roman PetscInt i, flg, *row, *col; 1324d24d4204SJose E. Roman PetscScalar *val; 1325d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1326d24d4204SJose E. Roman 1327d24d4204SJose E. Roman PetscFunctionBegin; 13283ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1329d24d4204SJose E. Roman while (1) { 13309566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1331d24d4204SJose E. Roman if (!flg) break; 1332d24d4204SJose E. Roman for (i = 0; i < n; i++) { 13339566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 13349566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1335792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1336aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1337d24d4204SJose E. Roman switch (A->insertmode) { 1338d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 1339d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1340d71ae5a4SJacob Faibussowitsch break; 1341d71ae5a4SJacob Faibussowitsch case ADD_VALUES: 1342d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1343d71ae5a4SJacob Faibussowitsch break; 1344d71ae5a4SJacob Faibussowitsch default: 1345d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1346d24d4204SJose E. Roman } 1347d24d4204SJose E. Roman } 1348d24d4204SJose E. Roman } 13499566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 13503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1351d24d4204SJose E. Roman } 1352d24d4204SJose E. Roman 135366976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1354d71ae5a4SJacob Faibussowitsch { 1355d24d4204SJose E. Roman Mat Adense, As; 1356d24d4204SJose E. Roman MPI_Comm comm; 1357d24d4204SJose E. Roman 1358d24d4204SJose E. Roman PetscFunctionBegin; 13599566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 13609566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense)); 13619566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE)); 13629566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer)); 13639566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 13649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 13659566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As)); 13663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1367d24d4204SJose E. Roman } 1368d24d4204SJose E. Roman 13699371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 13704cc2b5b5SPierre Jolivet NULL, 13714cc2b5b5SPierre Jolivet NULL, 1372d24d4204SJose E. Roman MatMult_ScaLAPACK, 1373d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1374d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1375d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1376d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1377d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 13784cc2b5b5SPierre Jolivet NULL, 13794cc2b5b5SPierre Jolivet /*10*/ NULL, 1380d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1381d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 13824cc2b5b5SPierre Jolivet NULL, 1383d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1384d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 13854cc2b5b5SPierre Jolivet NULL, 1386d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1387d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1388d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1389d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1390d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1391d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1392d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 13934cc2b5b5SPierre Jolivet /*24*/ NULL, 1394d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1395d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1396d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1397d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1398d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 13994cc2b5b5SPierre Jolivet NULL, 14004cc2b5b5SPierre Jolivet NULL, 14014cc2b5b5SPierre Jolivet NULL, 14024cc2b5b5SPierre Jolivet NULL, 1403d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 14044cc2b5b5SPierre Jolivet NULL, 14054cc2b5b5SPierre Jolivet NULL, 14064cc2b5b5SPierre Jolivet NULL, 14074cc2b5b5SPierre Jolivet NULL, 1408d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 14094cc2b5b5SPierre Jolivet NULL, 14104cc2b5b5SPierre Jolivet NULL, 14114cc2b5b5SPierre Jolivet NULL, 1412d24d4204SJose E. Roman MatCopy_ScaLAPACK, 14134cc2b5b5SPierre Jolivet /*44*/ NULL, 1414d24d4204SJose E. Roman MatScale_ScaLAPACK, 1415d24d4204SJose E. Roman MatShift_ScaLAPACK, 14164cc2b5b5SPierre Jolivet NULL, 14174cc2b5b5SPierre Jolivet NULL, 14184cc2b5b5SPierre Jolivet /*49*/ NULL, 14194cc2b5b5SPierre Jolivet NULL, 14204cc2b5b5SPierre Jolivet NULL, 14214cc2b5b5SPierre Jolivet NULL, 14224cc2b5b5SPierre Jolivet NULL, 14234cc2b5b5SPierre Jolivet /*54*/ NULL, 14244cc2b5b5SPierre Jolivet NULL, 14254cc2b5b5SPierre Jolivet NULL, 14264cc2b5b5SPierre Jolivet NULL, 14274cc2b5b5SPierre Jolivet NULL, 14284cc2b5b5SPierre Jolivet /*59*/ NULL, 1429d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1430d24d4204SJose E. Roman MatView_ScaLAPACK, 14314cc2b5b5SPierre Jolivet NULL, 14324cc2b5b5SPierre Jolivet NULL, 14334cc2b5b5SPierre Jolivet /*64*/ NULL, 14344cc2b5b5SPierre Jolivet NULL, 14354cc2b5b5SPierre Jolivet NULL, 14364cc2b5b5SPierre Jolivet NULL, 14374cc2b5b5SPierre Jolivet NULL, 14384cc2b5b5SPierre Jolivet /*69*/ NULL, 1439d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 14404cc2b5b5SPierre Jolivet NULL, 14414cc2b5b5SPierre Jolivet NULL, 14428bb0f5c6SPierre Jolivet NULL, 14434cc2b5b5SPierre Jolivet /*74*/ NULL, 14444cc2b5b5SPierre Jolivet NULL, 14454cc2b5b5SPierre Jolivet NULL, 14464cc2b5b5SPierre Jolivet NULL, 14478bb0f5c6SPierre Jolivet MatLoad_ScaLAPACK, 14484cc2b5b5SPierre Jolivet /*79*/ NULL, 14494cc2b5b5SPierre Jolivet NULL, 14504cc2b5b5SPierre Jolivet NULL, 14514cc2b5b5SPierre Jolivet NULL, 14528bb0f5c6SPierre Jolivet NULL, 14534cc2b5b5SPierre Jolivet /*84*/ NULL, 1454d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 14554cc2b5b5SPierre Jolivet NULL, 14564cc2b5b5SPierre Jolivet NULL, 1457d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 14588bb0f5c6SPierre Jolivet /*89*/ NULL, 14598bb0f5c6SPierre Jolivet MatProductSetFromOptions_ScaLAPACK, 14604cc2b5b5SPierre Jolivet NULL, 14614cc2b5b5SPierre Jolivet NULL, 1462d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 14638bb0f5c6SPierre Jolivet /*94*/ NULL, 14644cc2b5b5SPierre Jolivet NULL, 14654cc2b5b5SPierre Jolivet NULL, 14664cc2b5b5SPierre Jolivet NULL, 14674cc2b5b5SPierre Jolivet NULL, 14688bb0f5c6SPierre Jolivet /*99*/ NULL, 14698bb0f5c6SPierre Jolivet MatMatSolve_ScaLAPACK, 14704cc2b5b5SPierre Jolivet NULL, 14714cc2b5b5SPierre Jolivet NULL, 14724cc2b5b5SPierre Jolivet NULL, 1473*421480d9SBarry Smith /*104*/ NULL, 14748bb0f5c6SPierre Jolivet NULL, 14758bb0f5c6SPierre Jolivet NULL, 14768bb0f5c6SPierre Jolivet NULL, 14778bb0f5c6SPierre Jolivet NULL, 14788bb0f5c6SPierre Jolivet /*109*/ NULL, 14798bb0f5c6SPierre Jolivet MatHermitianTranspose_ScaLAPACK, 14808bb0f5c6SPierre Jolivet MatMultHermitianTranspose_ScaLAPACK, 14818bb0f5c6SPierre Jolivet MatMultHermitianTransposeAdd_ScaLAPACK, 1482*421480d9SBarry Smith NULL, 14834cc2b5b5SPierre Jolivet /*114*/ NULL, 14844cc2b5b5SPierre Jolivet NULL, 14854cc2b5b5SPierre Jolivet NULL, 14864cc2b5b5SPierre Jolivet NULL, 14874cc2b5b5SPierre Jolivet NULL, 14884cc2b5b5SPierre Jolivet /*119*/ NULL, 14898bb0f5c6SPierre Jolivet NULL, 14908bb0f5c6SPierre Jolivet MatTransposeMatMultNumeric_ScaLAPACK, 14914cc2b5b5SPierre Jolivet NULL, 14924cc2b5b5SPierre Jolivet /*124*/ NULL, 14934cc2b5b5SPierre Jolivet NULL, 14944cc2b5b5SPierre Jolivet NULL, 14954cc2b5b5SPierre Jolivet NULL, 14964cc2b5b5SPierre Jolivet NULL, 14974cc2b5b5SPierre Jolivet /*129*/ NULL, 14984cc2b5b5SPierre Jolivet NULL, 14994cc2b5b5SPierre Jolivet NULL, 15008bb0f5c6SPierre Jolivet NULL, 15014cc2b5b5SPierre Jolivet NULL, 15024cc2b5b5SPierre Jolivet /*134*/ NULL, 15034cc2b5b5SPierre Jolivet NULL, 15044cc2b5b5SPierre Jolivet NULL, 15054cc2b5b5SPierre Jolivet NULL, 15064cc2b5b5SPierre Jolivet NULL, 15074cc2b5b5SPierre Jolivet NULL, 15084cc2b5b5SPierre Jolivet /*140*/ NULL, 15094cc2b5b5SPierre Jolivet NULL, 151003db1824SAlex Lindsay NULL, 1511c2be7ffeSStefano Zampini NULL, 15124cc2b5b5SPierre Jolivet NULL}; 1513d24d4204SJose E. Roman 1514d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1515d71ae5a4SJacob Faibussowitsch { 15166497c311SBarry Smith PetscInt *owner, *startv, *starti, bs2; 1517d24d4204SJose E. Roman PetscInt size = stash->size, nsends; 15186497c311SBarry Smith PetscInt *sindices, **rindices, j, l; 1519d24d4204SJose E. Roman PetscScalar **rvalues, *svalues; 1520d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1521d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 15226497c311SBarry Smith PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii; 1523d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy; 1524d24d4204SJose E. Roman PetscScalar *sp_val; 1525d24d4204SJose E. Roman PetscMatStashSpace space, space_next; 1526d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1527d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1528d24d4204SJose E. Roman 1529d24d4204SJose E. Roman PetscFunctionBegin; 1530d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1531d24d4204SJose E. Roman InsertMode addv; 1532462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 153308401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1534d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1535d24d4204SJose E. Roman } 1536d24d4204SJose E. Roman 1537d24d4204SJose E. Roman bs2 = stash->bs * stash->bs; 1538d24d4204SJose E. Roman 1539d24d4204SJose E. Roman /* first count number of contributors to each processor */ 15409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 15419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1542d24d4204SJose E. Roman 15436497c311SBarry Smith ii = j = 0; 1544d24d4204SJose E. Roman space = stash->space_head; 1545d24d4204SJose E. Roman while (space) { 1546d24d4204SJose E. Roman space_next = space->next; 1547d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 15489566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 15499566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1550792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1551d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 15529371c9d4SSatish Balay nlengths[j]++; 15536497c311SBarry Smith owner[ii] = j; 15546497c311SBarry Smith ii++; 1555d24d4204SJose E. Roman } 1556d24d4204SJose E. Roman space = space_next; 1557d24d4204SJose E. Roman } 1558d24d4204SJose E. Roman 1559d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 15609566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 15616497c311SBarry Smith nsends = 0; 15626497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 1563d24d4204SJose E. Roman if (nlengths[i]) { 15649371c9d4SSatish Balay sizes[i] = 1; 15659371c9d4SSatish Balay nsends++; 1566d24d4204SJose E. Roman } 1567d24d4204SJose E. Roman } 1568d24d4204SJose E. Roman 15699371c9d4SSatish Balay { 15709371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 15716497c311SBarry Smith 1572d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 15739566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 15746497c311SBarry Smith PetscCall(PetscMPIIntCast(nsends, &insends)); 15756497c311SBarry Smith PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths)); 1576d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 15776497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2; 15789566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1579d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 15806497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2); 15819566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 15829566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 15839371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 15849371c9d4SSatish Balay } 1585d24d4204SJose E. Roman 1586d24d4204SJose E. Roman /* do sends: 1587d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1588d24d4204SJose E. Roman the ith processor 1589d24d4204SJose E. Roman */ 15909566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 15919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 15929566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1593d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 15949371c9d4SSatish Balay startv[0] = 0; 15959371c9d4SSatish Balay starti[0] = 0; 15966497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) { 1597d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1]; 1598d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1599d24d4204SJose E. Roman } 1600d24d4204SJose E. Roman 16016497c311SBarry Smith ii = 0; 1602d24d4204SJose E. Roman space = stash->space_head; 1603d24d4204SJose E. Roman while (space) { 1604d24d4204SJose E. Roman space_next = space->next; 1605d24d4204SJose E. Roman sp_idx = space->idx; 1606d24d4204SJose E. Roman sp_idy = space->idy; 1607d24d4204SJose E. Roman sp_val = space->val; 1608d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 16096497c311SBarry Smith j = owner[ii]; 1610d24d4204SJose E. Roman if (bs2 == 1) { 1611d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1612d24d4204SJose E. Roman } else { 1613d24d4204SJose E. Roman PetscInt k; 1614d24d4204SJose E. Roman PetscScalar *buf1, *buf2; 1615d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j]; 1616d24d4204SJose E. Roman buf2 = space->val + bs2 * l; 1617d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1618d24d4204SJose E. Roman } 1619d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1620d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1621d24d4204SJose E. Roman startv[j]++; 1622d24d4204SJose E. Roman starti[j]++; 16236497c311SBarry Smith ii++; 1624d24d4204SJose E. Roman } 1625d24d4204SJose E. Roman space = space_next; 1626d24d4204SJose E. Roman } 1627d24d4204SJose E. Roman startv[0] = 0; 16286497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1629d24d4204SJose E. Roman 16306497c311SBarry Smith for (PetscMPIInt i = 0, count = 0; i < size; i++) { 1631d24d4204SJose E. Roman if (sizes[i]) { 16326497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 16336497c311SBarry Smith PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1634d24d4204SJose E. Roman } 1635d24d4204SJose E. Roman } 1636d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 16379566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 16386497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 16396497c311SBarry Smith if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1640d24d4204SJose E. Roman } 1641d24d4204SJose E. Roman #endif 16429566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 16439566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 16449566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 16459566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1646d24d4204SJose E. Roman 1647d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 16489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1649d24d4204SJose E. Roman 16506497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) { 1651d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i]; 1652d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i]; 1653d24d4204SJose E. Roman } 1654d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1655d24d4204SJose E. Roman 16569566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 16579566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1658d24d4204SJose E. Roman 1659d24d4204SJose E. Roman stash->svalues = svalues; 1660d24d4204SJose E. Roman stash->sindices = sindices; 1661d24d4204SJose E. Roman stash->rvalues = rvalues; 1662d24d4204SJose E. Roman stash->rindices = rindices; 1663d24d4204SJose E. Roman stash->send_waits = send_waits; 16646497c311SBarry Smith stash->nsends = (PetscMPIInt)nsends; 1665d24d4204SJose E. Roman stash->nrecvs = nreceives; 1666d24d4204SJose E. Roman stash->reproduce_count = 0; 16673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1668d24d4204SJose E. Roman } 1669d24d4204SJose E. Roman 1670d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1671d71ae5a4SJacob Faibussowitsch { 1672d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1673d24d4204SJose E. Roman 1674d24d4204SJose E. Roman PetscFunctionBegin; 167528b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1676aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1677aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 16789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 16799566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 16803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1681d24d4204SJose E. Roman } 1682d24d4204SJose E. Roman 1683d24d4204SJose E. Roman /*@ 16846aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 168511a5261eSBarry Smith the `MATSCALAPACK` matrix 1686d24d4204SJose E. Roman 1687c3339decSBarry Smith Logically Collective 1688d24d4204SJose E. Roman 1689d8d19677SJose E. Roman Input Parameters: 169011a5261eSBarry Smith + A - a `MATSCALAPACK` matrix 1691d24d4204SJose E. Roman . mb - the row block size 1692d24d4204SJose E. Roman - nb - the column block size 1693d24d4204SJose E. Roman 1694d24d4204SJose E. Roman Level: intermediate 1695d24d4204SJose E. Roman 16962ef1f0ffSBarry Smith Note: 16972ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 16982ef1f0ffSBarry Smith 16991cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1700d24d4204SJose E. Roman @*/ 1701d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1702d71ae5a4SJacob Faibussowitsch { 1703d24d4204SJose E. Roman PetscFunctionBegin; 1704d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1705d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2); 1706d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3); 1707cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 17083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1709d24d4204SJose E. Roman } 1710d24d4204SJose E. Roman 1711d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1712d71ae5a4SJacob Faibussowitsch { 1713d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1714d24d4204SJose E. Roman 1715d24d4204SJose E. Roman PetscFunctionBegin; 1716d24d4204SJose E. Roman if (mb) *mb = a->mb; 1717d24d4204SJose E. Roman if (nb) *nb = a->nb; 17183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1719d24d4204SJose E. Roman } 1720d24d4204SJose E. Roman 1721d24d4204SJose E. Roman /*@ 17226aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 172311a5261eSBarry Smith the `MATSCALAPACK` matrix 1724d24d4204SJose E. Roman 17252ef1f0ffSBarry Smith Not Collective 1726d24d4204SJose E. Roman 1727d24d4204SJose E. Roman Input Parameter: 172811a5261eSBarry Smith . A - a `MATSCALAPACK` matrix 1729d24d4204SJose E. Roman 1730d24d4204SJose E. Roman Output Parameters: 1731d24d4204SJose E. Roman + mb - the row block size 1732d24d4204SJose E. Roman - nb - the column block size 1733d24d4204SJose E. Roman 1734d24d4204SJose E. Roman Level: intermediate 1735d24d4204SJose E. Roman 17362ef1f0ffSBarry Smith Note: 17372ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 17382ef1f0ffSBarry Smith 17391cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1740d24d4204SJose E. Roman @*/ 1741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1742d71ae5a4SJacob Faibussowitsch { 1743d24d4204SJose E. Roman PetscFunctionBegin; 1744d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1745cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 17463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1747d24d4204SJose E. Roman } 1748d24d4204SJose E. Roman 1749d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1750d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1751d24d4204SJose E. Roman 1752d24d4204SJose E. Roman /*MC 1753d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1754d24d4204SJose E. Roman 17552ef1f0ffSBarry Smith Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1756d24d4204SJose E. Roman 1757d24d4204SJose E. Roman Options Database Keys: 17582ef1f0ffSBarry Smith + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 17592ef1f0ffSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1760d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1761d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1762d24d4204SJose E. Roman 17632ef1f0ffSBarry Smith Level: intermediate 17642ef1f0ffSBarry Smith 176589bba20eSBarry Smith Note: 176689bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 176789bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 176889bba20eSBarry Smith the given rank. 176989bba20eSBarry Smith 17701cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1771d24d4204SJose E. Roman M*/ 1772d24d4204SJose E. Roman 1773d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1774d71ae5a4SJacob Faibussowitsch { 1775d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1776b8b5be36SMartin Diehl PetscBool flg; 1777b8b5be36SMartin Diehl PetscMPIInt iflg; 1778d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1779d24d4204SJose E. Roman MPI_Comm icomm; 1780d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol; 1781d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0}; 1782d24d4204SJose E. Roman PetscMPIInt size; 1783d24d4204SJose E. Roman 1784d24d4204SJose E. Roman PetscFunctionBegin; 1785aea10558SJacob Faibussowitsch A->ops[0] = MatOps_Values; 1786d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1787d24d4204SJose E. Roman 17889566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1789d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1790d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1791d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1792d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1793d24d4204SJose E. Roman 17944dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1795d24d4204SJose E. Roman A->data = (void *)a; 1796d24d4204SJose E. Roman 1797d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1798d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1799c8025a54SPierre Jolivet PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL)); 18009566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 18019566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1802d24d4204SJose E. Roman } 18039566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1804b8b5be36SMartin Diehl PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg)); 1805b8b5be36SMartin Diehl if (!iflg) { 18064dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&grid)); 1807d24d4204SJose E. Roman 18089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size)); 18096497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow)); 1810d24d4204SJose E. Roman 1811d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1812b8b5be36SMartin Diehl PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg)); 1813b8b5be36SMartin Diehl if (flg) { 181408401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 18156497c311SBarry Smith PetscCall(PetscBLASIntCast(optv1, &grid->nprow)); 1816d24d4204SJose E. Roman } 1817d0609cedSBarry Smith PetscOptionsEnd(); 1818d24d4204SJose E. Roman 1819d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1820d24d4204SJose E. Roman grid->npcol = size / grid->nprow; 18219566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 18229566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1823f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1824d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1825d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1826d24d4204SJose E. Roman grid->grid_refct = 1; 1827d24d4204SJose E. Roman grid->nprow = nprow; 1828d24d4204SJose E. Roman grid->npcol = npcol; 1829d24d4204SJose E. Roman grid->myrow = myrow; 1830d24d4204SJose E. Roman grid->mycol = mycol; 1831d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1832f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1833d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1834f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1835d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 18369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1837d24d4204SJose E. Roman 1838d24d4204SJose E. Roman } else grid->grid_refct++; 18399566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1840d24d4204SJose E. Roman a->grid = grid; 1841d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1842d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1843d24d4204SJose E. Roman 1844d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 18459566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1846d24d4204SJose E. Roman if (flg) { 18476497c311SBarry Smith a->mb = (PetscMPIInt)array[0]; 18486497c311SBarry Smith a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb; 1849d24d4204SJose E. Roman } 1850d0609cedSBarry Smith PetscOptionsEnd(); 1851d24d4204SJose E. Roman 1852b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 18539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 18549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 18559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 18569566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 18573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1858d24d4204SJose E. Roman } 1859d24d4204SJose E. Roman 1860d24d4204SJose E. Roman /*@C 1861d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 186211a5261eSBarry Smith (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1863d24d4204SJose E. Roman 1864d24d4204SJose E. Roman Collective 1865d24d4204SJose E. Roman 1866d24d4204SJose E. Roman Input Parameters: 1867d24d4204SJose E. Roman + comm - MPI communicator 186811a5261eSBarry Smith . mb - row block size (or `PETSC_DECIDE` to have it set) 186911a5261eSBarry Smith . nb - column block size (or `PETSC_DECIDE` to have it set) 1870d24d4204SJose E. Roman . M - number of global rows 1871d24d4204SJose E. Roman . N - number of global columns 1872d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1873d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1874d24d4204SJose E. Roman 1875d24d4204SJose E. Roman Output Parameter: 1876d24d4204SJose E. Roman . A - the matrix 1877d24d4204SJose E. Roman 187811a5261eSBarry Smith Options Database Key: 1879d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1880d24d4204SJose E. Roman 18812ef1f0ffSBarry Smith Level: intermediate 18822ef1f0ffSBarry Smith 18832ef1f0ffSBarry Smith Notes: 18842ef1f0ffSBarry Smith If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 18852ef1f0ffSBarry Smith 188611a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1887d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 188811a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1889d24d4204SJose E. Roman 189046091a0eSPierre Jolivet Storage is completely managed by ScaLAPACK, so this requires PETSc to be 1891d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1892d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 189311a5261eSBarry Smith used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1894d24d4204SJose E. Roman 18951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1896d24d4204SJose E. Roman @*/ 1897d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1898d71ae5a4SJacob Faibussowitsch { 1899d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1900d24d4204SJose E. Roman PetscInt m, n; 1901d24d4204SJose E. Roman 1902d24d4204SJose E. Roman PetscFunctionBegin; 19039566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 19049566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK)); 1905aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1906d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1907d24d4204SJose E. Roman m = PETSC_DECIDE; 19089566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1909d24d4204SJose E. Roman n = PETSC_DECIDE; 19109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 19119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1912d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data; 19139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M)); 19149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N)); 19159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 19169566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 19179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 19189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 19199566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 19203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1921d24d4204SJose E. Roman } 1922