1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23d71ae5a4SJacob Faibussowitsch { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31d71ae5a4SJacob Faibussowitsch { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39d24d4204SJose E. Roman if (iascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59d71ae5a4SJacob Faibussowitsch { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 72462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 76462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 844dfa11a4SJacob Faibussowitsch info->memory = 0; /* REVIEW ME */ 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92d71ae5a4SJacob Faibussowitsch { 93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94b12397e7SPierre Jolivet 95d24d4204SJose E. Roman PetscFunctionBegin; 96d24d4204SJose E. Roman switch (op) { 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 100d24d4204SJose E. Roman case MAT_SYMMETRIC: 101d24d4204SJose E. Roman case MAT_SORTED_FULL: 102d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 103d71ae5a4SJacob Faibussowitsch break; 104d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 105d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 106d71ae5a4SJacob Faibussowitsch break; 107d71ae5a4SJacob Faibussowitsch default: 108d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109d24d4204SJose E. Roman } 1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman 113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114d71ae5a4SJacob Faibussowitsch { 115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116d24d4204SJose E. Roman PetscInt i, j; 117d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 119d24d4204SJose E. Roman 120d24d4204SJose E. Roman PetscFunctionBegin; 121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122d24d4204SJose E. Roman for (i = 0; i < nr; i++) { 123d24d4204SJose E. Roman if (rows[i] < 0) continue; 1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125d24d4204SJose E. Roman for (j = 0; j < nc; j++) { 126d24d4204SJose E. Roman if (cols[j] < 0) continue; 1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130b12397e7SPierre Jolivet if (roworiented) { 131d24d4204SJose E. Roman switch (imode) { 132d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 133d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134d71ae5a4SJacob Faibussowitsch break; 135d71ae5a4SJacob Faibussowitsch default: 136d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137d71ae5a4SJacob Faibussowitsch break; 138b12397e7SPierre Jolivet } 139b12397e7SPierre Jolivet } else { 140b12397e7SPierre Jolivet switch (imode) { 141d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 142d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143d71ae5a4SJacob Faibussowitsch break; 144d71ae5a4SJacob Faibussowitsch default: 145d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146d71ae5a4SJacob Faibussowitsch break; 147b12397e7SPierre Jolivet } 148d24d4204SJose E. Roman } 149d24d4204SJose E. Roman } else { 15028b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 152b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153d24d4204SJose E. Roman } 154d24d4204SJose E. Roman } 155d24d4204SJose E. Roman } 1563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 157d24d4204SJose E. Roman } 158d24d4204SJose E. Roman 1595e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160d71ae5a4SJacob Faibussowitsch { 161d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0; 163d24d4204SJose E. Roman const PetscInt *ranges; 164d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman PetscFunctionBegin; 167d24d4204SJose E. Roman if (transpose) { 168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 1716497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); 172792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1749566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176d24d4204SJose E. Roman ylld = 1; 177792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 179d24d4204SJose E. Roman 180d24d4204SJose E. Roman /* allocate 2d vectors */ 181d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 1846497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 189792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 191d24d4204SJose E. Roman 192d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 193c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194d24d4204SJose E. Roman 195d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 196792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* call PBLAS subroutine */ 1995e4d33a3SBlanca Mellado Pinto if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 2005e4d33a3SBlanca Mellado Pinto else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 201d24d4204SJose E. Roman 202d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 203792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 204d24d4204SJose E. Roman 205d24d4204SJose E. Roman } else { /* non-transpose */ 206d24d4204SJose E. Roman 207d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 2089566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 2099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 210d24d4204SJose E. Roman xlld = 1; 211792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 212d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 2139566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 2149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 2156497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld)); 216792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 217d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 218d24d4204SJose E. Roman 219d24d4204SJose E. Roman /* allocate 2d vectors */ 220d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 221d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 2229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 2236497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld)); 224d24d4204SJose E. Roman 225d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 226792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 227d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 228792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 229d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 230d24d4204SJose E. Roman 231d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 232c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 233d24d4204SJose E. Roman 234d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 235792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 236d24d4204SJose E. Roman 237d24d4204SJose E. Roman /* call PBLAS subroutine */ 238792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 239d24d4204SJose E. Roman 240d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 241792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 242d24d4204SJose E. Roman } 2439566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d)); 2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 245d24d4204SJose E. Roman } 246d24d4204SJose E. Roman 247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 248d71ae5a4SJacob Faibussowitsch { 249d24d4204SJose E. Roman const PetscScalar *xarray; 250d24d4204SJose E. Roman PetscScalar *yarray; 251d24d4204SJose E. Roman 252d24d4204SJose E. Roman PetscFunctionBegin; 2539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2549566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2555e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); 2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259d24d4204SJose E. Roman } 260d24d4204SJose E. Roman 261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 262d71ae5a4SJacob Faibussowitsch { 263d24d4204SJose E. Roman const PetscScalar *xarray; 264d24d4204SJose E. Roman PetscScalar *yarray; 265d24d4204SJose E. Roman 266d24d4204SJose E. Roman PetscFunctionBegin; 2679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2695e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 273d24d4204SJose E. Roman } 274d24d4204SJose E. Roman 2755e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 2765e4d33a3SBlanca Mellado Pinto { 2775e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray; 2785e4d33a3SBlanca Mellado Pinto PetscScalar *yarray; 2795e4d33a3SBlanca Mellado Pinto 2805e4d33a3SBlanca Mellado Pinto PetscFunctionBegin; 2815e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray)); 2825e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayWrite(y, &yarray)); 2835e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); 2845e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray)); 2855e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayWrite(y, &yarray)); 2865e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 2875e4d33a3SBlanca Mellado Pinto } 2885e4d33a3SBlanca Mellado Pinto 289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290d71ae5a4SJacob Faibussowitsch { 291d24d4204SJose E. Roman const PetscScalar *xarray; 292d24d4204SJose E. Roman PetscScalar *zarray; 293d24d4204SJose E. Roman 294d24d4204SJose E. Roman PetscFunctionBegin; 2959566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2985e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); 2999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 302d24d4204SJose E. Roman } 303d24d4204SJose E. Roman 304d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 305d71ae5a4SJacob Faibussowitsch { 306d24d4204SJose E. Roman const PetscScalar *xarray; 307d24d4204SJose E. Roman PetscScalar *zarray; 308d24d4204SJose E. Roman 309d24d4204SJose E. Roman PetscFunctionBegin; 3109566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 3119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 3129566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 3135e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); 3145e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray)); 3155e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArray(z, &zarray)); 3165e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 3175e4d33a3SBlanca Mellado Pinto } 3185e4d33a3SBlanca Mellado Pinto 3195e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 3205e4d33a3SBlanca Mellado Pinto { 3215e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray; 3225e4d33a3SBlanca Mellado Pinto PetscScalar *zarray; 3235e4d33a3SBlanca Mellado Pinto 3245e4d33a3SBlanca Mellado Pinto PetscFunctionBegin; 3255e4d33a3SBlanca Mellado Pinto if (y != z) PetscCall(VecCopy(y, z)); 3265e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray)); 3275e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArray(z, &zarray)); 3285e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); 3299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 332d24d4204SJose E. Roman } 333d24d4204SJose E. Roman 334d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 335d71ae5a4SJacob Faibussowitsch { 336d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 337d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 338d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 339d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 340d24d4204SJose E. Roman PetscBLASInt one = 1; 341d24d4204SJose E. Roman 342d24d4204SJose E. Roman PetscFunctionBegin; 343792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 344d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 346d24d4204SJose E. Roman } 347d24d4204SJose E. Roman 348d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 349d71ae5a4SJacob Faibussowitsch { 350d24d4204SJose E. Roman PetscFunctionBegin; 3519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3529566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3539566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 354d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 3553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 356d24d4204SJose E. Roman } 357d24d4204SJose E. Roman 358e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 359e0d15688SBlanca Mellado Pinto { 360e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 361e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 362e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 363e0d15688SBlanca Mellado Pinto PetscScalar sone = 1.0, zero = 0.0; 364e0d15688SBlanca Mellado Pinto PetscBLASInt one = 1; 365e0d15688SBlanca Mellado Pinto 366e0d15688SBlanca Mellado Pinto PetscFunctionBegin; 367e0d15688SBlanca Mellado Pinto PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 368e0d15688SBlanca Mellado Pinto C->assembled = PETSC_TRUE; 369e0d15688SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 370e0d15688SBlanca Mellado Pinto } 371e0d15688SBlanca Mellado Pinto 372e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 373e0d15688SBlanca Mellado Pinto { 374e0d15688SBlanca Mellado Pinto PetscFunctionBegin; 375e0d15688SBlanca Mellado Pinto PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 376e0d15688SBlanca Mellado Pinto PetscCall(MatSetType(C, MATSCALAPACK)); 377e0d15688SBlanca Mellado Pinto PetscCall(MatSetUp(C)); 378e0d15688SBlanca Mellado Pinto C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK; 379e0d15688SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 380e0d15688SBlanca Mellado Pinto } 381e0d15688SBlanca Mellado Pinto 382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 383d71ae5a4SJacob Faibussowitsch { 384d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 385d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 386d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 387d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 388d24d4204SJose E. Roman PetscBLASInt one = 1; 389d24d4204SJose E. Roman 390d24d4204SJose E. Roman PetscFunctionBegin; 391792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 392d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 394d24d4204SJose E. Roman } 395d24d4204SJose E. Roman 396d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 397d71ae5a4SJacob Faibussowitsch { 398d24d4204SJose E. Roman PetscFunctionBegin; 3999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 4009566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 4019566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 402e0d15688SBlanca Mellado Pinto C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK; 4033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 404d24d4204SJose E. Roman } 405d24d4204SJose E. Roman 406d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 407d71ae5a4SJacob Faibussowitsch { 408d24d4204SJose E. Roman Mat_Product *product = C->product; 409d24d4204SJose E. Roman 410d24d4204SJose E. Roman PetscFunctionBegin; 411d24d4204SJose E. Roman switch (product->type) { 412d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 413e0d15688SBlanca Mellado Pinto C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 414e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_AB; 415e0d15688SBlanca Mellado Pinto break; 416e0d15688SBlanca Mellado Pinto case MATPRODUCT_AtB: 417e0d15688SBlanca Mellado Pinto C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK; 418e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_AtB; 419d71ae5a4SJacob Faibussowitsch break; 420d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 421e0d15688SBlanca Mellado Pinto C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 422e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_ABt; 423d71ae5a4SJacob Faibussowitsch break; 424d71ae5a4SJacob Faibussowitsch default: 425d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 426d24d4204SJose E. Roman } 4273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 428d24d4204SJose E. Roman } 429d24d4204SJose E. Roman 430d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 431d71ae5a4SJacob Faibussowitsch { 432d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 433d24d4204SJose E. Roman PetscScalar *darray, *d2d, v; 434d24d4204SJose E. Roman const PetscInt *ranges; 435d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 436d24d4204SJose E. Roman 437d24d4204SJose E. Roman PetscFunctionBegin; 4389566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray)); 439d24d4204SJose E. Roman 440d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */ 441d24d4204SJose E. Roman 442d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4439566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 4449566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 4456497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); 446792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 447d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 448d24d4204SJose E. Roman 449d24d4204SJose E. Roman /* allocate 2d vector */ 450d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 4519566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 4526497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); 453d24d4204SJose E. Roman 454d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 455792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 456d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 457d24d4204SJose E. Roman 458d24d4204SJose E. Roman /* collect diagonal */ 459d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) { 460792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 461792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 462d24d4204SJose E. Roman } 463d24d4204SJose E. Roman 464d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 465792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 4669566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 467d24d4204SJose E. Roman 468d24d4204SJose E. Roman } else { /* column version */ 469d24d4204SJose E. Roman 470d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4719566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473d24d4204SJose E. Roman dlld = 1; 474792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 476d24d4204SJose E. Roman 477d24d4204SJose E. Roman /* allocate 2d vector */ 478d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4799566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 480d24d4204SJose E. Roman 481d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 484d24d4204SJose E. Roman 485d24d4204SJose E. Roman /* collect diagonal */ 486d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) { 487792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 488792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 489d24d4204SJose E. Roman } 490d24d4204SJose E. Roman 491d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 492792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 4939566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 494d24d4204SJose E. Roman } 495d24d4204SJose E. Roman 4969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray)); 4979566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4989566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 4993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 500d24d4204SJose E. Roman } 501d24d4204SJose E. Roman 502d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 503d71ae5a4SJacob Faibussowitsch { 504d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 505d24d4204SJose E. Roman const PetscScalar *d; 506d24d4204SJose E. Roman const PetscInt *ranges; 507d24d4204SJose E. Roman PetscScalar *d2d; 508d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 509d24d4204SJose E. Roman 510d24d4204SJose E. Roman PetscFunctionBegin; 511d24d4204SJose E. Roman if (R) { 512eb8278ccSPierre Jolivet PetscCall(VecGetArrayRead(R, &d)); 513d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5149566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 5159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 516d24d4204SJose E. Roman dlld = 1; 517792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 518d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 519d24d4204SJose E. Roman 520d24d4204SJose E. Roman /* allocate 2d vector */ 521d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 5229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 523d24d4204SJose E. Roman 524d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 525792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 526d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 527d24d4204SJose E. Roman 528d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 529c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 530d24d4204SJose E. Roman 531d24d4204SJose E. Roman /* broadcast along process columns */ 532d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 533d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 534d24d4204SJose E. Roman 535d24d4204SJose E. Roman /* local scaling */ 5369371c9d4SSatish Balay for (j = 0; j < a->locc; j++) 5379371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 538d24d4204SJose E. Roman 5399566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 540eb8278ccSPierre Jolivet PetscCall(VecRestoreArrayRead(R, &d)); 541d24d4204SJose E. Roman } 542d24d4204SJose E. Roman if (L) { 543eb8278ccSPierre Jolivet PetscCall(VecGetArrayRead(L, &d)); 544d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5459566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 5469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 5476497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); 548792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 549d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 550d24d4204SJose E. Roman 551d24d4204SJose E. Roman /* allocate 2d vector */ 552d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 5539566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 5546497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); 555d24d4204SJose E. Roman 556d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 557792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 558d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 559d24d4204SJose E. Roman 560d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 561c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 562d24d4204SJose E. Roman 563d24d4204SJose E. Roman /* broadcast along process rows */ 564d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 565d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 566d24d4204SJose E. Roman 567d24d4204SJose E. Roman /* local scaling */ 5689371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 5699371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 570d24d4204SJose E. Roman 5719566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 572eb8278ccSPierre Jolivet PetscCall(VecRestoreArrayRead(L, &d)); 573d24d4204SJose E. Roman } 5743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 575d24d4204SJose E. Roman } 576d24d4204SJose E. Roman 577d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 578d71ae5a4SJacob Faibussowitsch { 579d24d4204SJose E. Roman PetscFunctionBegin; 580d24d4204SJose E. Roman *missing = PETSC_FALSE; 5813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 582d24d4204SJose E. Roman } 583d24d4204SJose E. Roman 584d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 585d71ae5a4SJacob Faibussowitsch { 586d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 587d24d4204SJose E. Roman PetscBLASInt n, one = 1; 588d24d4204SJose E. Roman 589d24d4204SJose E. Roman PetscFunctionBegin; 590d24d4204SJose E. Roman n = x->lld * x->locc; 591792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 5923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 593d24d4204SJose E. Roman } 594d24d4204SJose E. Roman 595d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 596d71ae5a4SJacob Faibussowitsch { 597d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 598d24d4204SJose E. Roman PetscBLASInt i, n; 599d24d4204SJose E. Roman PetscScalar v; 600d24d4204SJose E. Roman 601d24d4204SJose E. Roman PetscFunctionBegin; 602d24d4204SJose E. Roman n = PetscMin(x->M, x->N); 603d24d4204SJose E. Roman for (i = 1; i <= n; i++) { 604792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 605d24d4204SJose E. Roman v += alpha; 606792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 607d24d4204SJose E. Roman } 6083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 609d24d4204SJose E. Roman } 610d24d4204SJose E. Roman 611d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 612d71ae5a4SJacob Faibussowitsch { 613d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 614d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 615d24d4204SJose E. Roman PetscBLASInt one = 1; 616d24d4204SJose E. Roman PetscScalar beta = 1.0; 617d24d4204SJose E. Roman 618d24d4204SJose E. Roman PetscFunctionBegin; 619d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3); 620792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 6219566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 6223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 623d24d4204SJose E. Roman } 624d24d4204SJose E. Roman 625d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 626d71ae5a4SJacob Faibussowitsch { 627d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 628d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 629d24d4204SJose E. Roman 630d24d4204SJose E. Roman PetscFunctionBegin; 6319566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 6329566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 6333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 634d24d4204SJose E. Roman } 635d24d4204SJose E. Roman 636d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 637d71ae5a4SJacob Faibussowitsch { 638d24d4204SJose E. Roman Mat Bs; 639d24d4204SJose E. Roman MPI_Comm comm; 640d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 641d24d4204SJose E. Roman 642d24d4204SJose E. Roman PetscFunctionBegin; 6439566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 6449566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs)); 6459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 6469566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK)); 647d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 648d24d4204SJose E. Roman b->M = a->M; 649d24d4204SJose E. Roman b->N = a->N; 650d24d4204SJose E. Roman b->mb = a->mb; 651d24d4204SJose E. Roman b->nb = a->nb; 652d24d4204SJose E. Roman b->rsrc = a->rsrc; 653d24d4204SJose E. Roman b->csrc = a->csrc; 6549566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 655d24d4204SJose E. Roman *B = Bs; 65648a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 657d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 659d24d4204SJose E. Roman } 660d24d4204SJose E. Roman 661d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 662d71ae5a4SJacob Faibussowitsch { 663d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 664d24d4204SJose E. Roman Mat Bs = *B; 665d24d4204SJose E. Roman PetscBLASInt one = 1; 666d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 667d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 668d24d4204SJose E. Roman PetscInt i, j; 669d24d4204SJose E. Roman #endif 670d24d4204SJose E. Roman 671d24d4204SJose E. Roman PetscFunctionBegin; 6727fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 6730fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 6749566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 675d24d4204SJose E. Roman *B = Bs; 676d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 677792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 678d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 679d24d4204SJose E. Roman /* undo conjugation */ 6809371c9d4SSatish Balay for (i = 0; i < b->locr; i++) 6819371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 682d24d4204SJose E. Roman #endif 683d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 685d24d4204SJose E. Roman } 686d24d4204SJose E. Roman 687d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 688d71ae5a4SJacob Faibussowitsch { 689d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 690d24d4204SJose E. Roman PetscInt i, j; 691d24d4204SJose E. Roman 692d24d4204SJose E. Roman PetscFunctionBegin; 6939371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 6949371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 6953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 696d24d4204SJose E. Roman } 697d24d4204SJose E. Roman 698d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 699d71ae5a4SJacob Faibussowitsch { 700d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 701d24d4204SJose E. Roman Mat Bs = *B; 702d24d4204SJose E. Roman PetscBLASInt one = 1; 703d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 704d24d4204SJose E. Roman 705d24d4204SJose E. Roman PetscFunctionBegin; 7060fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 7079566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 708d24d4204SJose E. Roman *B = Bs; 709d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 710792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 711d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 7123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 713d24d4204SJose E. Roman } 714d24d4204SJose E. Roman 715d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 716d71ae5a4SJacob Faibussowitsch { 717d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 718d24d4204SJose E. Roman PetscScalar *x, *x2d; 719d24d4204SJose E. Roman const PetscInt *ranges; 720d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 721d24d4204SJose E. Roman 722d24d4204SJose E. Roman PetscFunctionBegin; 7239566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X)); 7249566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 725d24d4204SJose E. Roman 726d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 7279566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 7289566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 7296497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); 730792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 731d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 732d24d4204SJose E. Roman 733d24d4204SJose E. Roman /* allocate 2d vector */ 734d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 7359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d)); 7366497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); 737d24d4204SJose E. Roman 738d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 739792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 740d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 741d24d4204SJose E. Roman 742d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 743792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 744d24d4204SJose E. Roman 745d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 746d24d4204SJose E. Roman switch (A->factortype) { 747d24d4204SJose E. Roman case MAT_FACTOR_LU: 748792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 749d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 750d24d4204SJose E. Roman break; 751d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 752792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 753d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 754d24d4204SJose E. Roman break; 755d71ae5a4SJacob Faibussowitsch default: 756d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 757d24d4204SJose E. Roman } 758d24d4204SJose E. Roman 759d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 760792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 761d24d4204SJose E. Roman 7629566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 7639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 7643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 765d24d4204SJose E. Roman } 766d24d4204SJose E. Roman 767d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 768d71ae5a4SJacob Faibussowitsch { 769d24d4204SJose E. Roman PetscFunctionBegin; 7709566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X)); 7719566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y)); 7723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 773d24d4204SJose E. Roman } 774d24d4204SJose E. Roman 775d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 776d71ae5a4SJacob Faibussowitsch { 7775d5af635SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x; 778d24d4204SJose E. Roman PetscBool flg1, flg2; 779d24d4204SJose E. Roman PetscBLASInt one = 1, info; 7805d5af635SJose E. Roman Mat C; 7815d5af635SJose E. Roman MatType type; 782d24d4204SJose E. Roman 783d24d4204SJose E. Roman PetscFunctionBegin; 7849566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 7859566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 7865d5af635SJose E. Roman if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3); 7875d5af635SJose E. Roman if (flg2) { 7885d5af635SJose E. Roman if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 7895d5af635SJose E. Roman else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X)); 7905d5af635SJose E. Roman C = X; 7915d5af635SJose E. Roman } else { 7925d5af635SJose E. Roman PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C)); 7935d5af635SJose E. Roman } 7945d5af635SJose E. Roman x = (Mat_ScaLAPACK *)C->data; 795d24d4204SJose E. Roman 796d24d4204SJose E. Roman switch (A->factortype) { 797d24d4204SJose E. Roman case MAT_FACTOR_LU: 798792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 799d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 800d24d4204SJose E. Roman break; 801d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 802792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 803d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 804d24d4204SJose E. Roman break; 805d71ae5a4SJacob Faibussowitsch default: 806d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 807d24d4204SJose E. Roman } 8085d5af635SJose E. Roman if (!flg2) { 8095d5af635SJose E. Roman PetscCall(MatGetType(X, &type)); 8105d5af635SJose E. Roman PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X)); 8115d5af635SJose E. Roman PetscCall(MatDestroy(&C)); 8125d5af635SJose E. Roman } 8133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 814d24d4204SJose E. Roman } 815d24d4204SJose E. Roman 816d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 817d71ae5a4SJacob Faibussowitsch { 818d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 819d24d4204SJose E. Roman PetscBLASInt one = 1, info; 820d24d4204SJose E. Roman 821d24d4204SJose E. Roman PetscFunctionBegin; 822aa624791SPierre Jolivet if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 823792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 824d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info); 825d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 826d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 827d24d4204SJose E. Roman 8289566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8299566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 831d24d4204SJose E. Roman } 832d24d4204SJose E. Roman 833d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 834d71ae5a4SJacob Faibussowitsch { 835d24d4204SJose E. Roman PetscFunctionBegin; 8369566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8379566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 8383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 839d24d4204SJose E. Roman } 840d24d4204SJose E. Roman 841d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 842d71ae5a4SJacob Faibussowitsch { 843d24d4204SJose E. Roman PetscFunctionBegin; 844d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 846d24d4204SJose E. Roman } 847d24d4204SJose E. Roman 848d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 849d71ae5a4SJacob Faibussowitsch { 850d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 851d24d4204SJose E. Roman PetscBLASInt one = 1, info; 852d24d4204SJose E. Roman 853d24d4204SJose E. Roman PetscFunctionBegin; 854792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 855d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info); 856d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 857d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 858d24d4204SJose E. Roman 8599566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8609566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 862d24d4204SJose E. Roman } 863d24d4204SJose E. Roman 864d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 865d71ae5a4SJacob Faibussowitsch { 866d24d4204SJose E. Roman PetscFunctionBegin; 8679566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8689566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 8693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 870d24d4204SJose E. Roman } 871d24d4204SJose E. Roman 872d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 873d71ae5a4SJacob Faibussowitsch { 874d24d4204SJose E. Roman PetscFunctionBegin; 875d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 877d24d4204SJose E. Roman } 878d24d4204SJose E. Roman 87966976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 880d71ae5a4SJacob Faibussowitsch { 881d24d4204SJose E. Roman PetscFunctionBegin; 882d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 884d24d4204SJose E. Roman } 885d24d4204SJose E. Roman 886d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 887d71ae5a4SJacob Faibussowitsch { 888d24d4204SJose E. Roman Mat B; 88959172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 890d24d4204SJose E. Roman 891d24d4204SJose E. Roman PetscFunctionBegin; 892d24d4204SJose E. Roman /* Create the factorization matrix */ 8939566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 89466e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 895d24d4204SJose E. Roman B->factortype = ftype; 8969566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8979566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 898d24d4204SJose E. Roman 8999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 900d24d4204SJose E. Roman *F = B; 9013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 902d24d4204SJose E. Roman } 903d24d4204SJose E. Roman 904d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 905d71ae5a4SJacob Faibussowitsch { 906d24d4204SJose E. Roman PetscFunctionBegin; 9079566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 9089566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 9093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 910d24d4204SJose E. Roman } 911d24d4204SJose E. Roman 912d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 913d71ae5a4SJacob Faibussowitsch { 914d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 915d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0; 916d24d4204SJose E. Roman const char *ntype; 917d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy; 918d24d4204SJose E. Roman 919d24d4204SJose E. Roman PetscFunctionBegin; 920d24d4204SJose E. Roman switch (type) { 921d24d4204SJose E. Roman case NORM_1: 922d24d4204SJose E. Roman ntype = "1"; 923d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 924d24d4204SJose E. Roman break; 925d24d4204SJose E. Roman case NORM_FROBENIUS: 926d24d4204SJose E. Roman ntype = "F"; 927d24d4204SJose E. Roman work = &dummy; 928d24d4204SJose E. Roman break; 929d24d4204SJose E. Roman case NORM_INFINITY: 930d24d4204SJose E. Roman ntype = "I"; 931d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 932d24d4204SJose E. Roman break; 933d71ae5a4SJacob Faibussowitsch default: 934d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 935d24d4204SJose E. Roman } 9369566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 937d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 9389566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 9393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 940d24d4204SJose E. Roman } 941d24d4204SJose E. Roman 942d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 943d71ae5a4SJacob Faibussowitsch { 944d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 945d24d4204SJose E. Roman 946d24d4204SJose E. Roman PetscFunctionBegin; 9479566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 9483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 949d24d4204SJose E. Roman } 950d24d4204SJose E. Roman 951d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 952d71ae5a4SJacob Faibussowitsch { 953d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 954d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx; 955d24d4204SJose E. Roman 956d24d4204SJose E. Roman PetscFunctionBegin; 957d24d4204SJose E. Roman if (rows) { 958d24d4204SJose E. Roman n = a->locr; 959d24d4204SJose E. Roman nb = a->mb; 960d24d4204SJose E. Roman isrc = a->rsrc; 961d24d4204SJose E. Roman nproc = a->grid->nprow; 962d24d4204SJose E. Roman iproc = a->grid->myrow; 9639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 964d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9659566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 966d24d4204SJose E. Roman } 967d24d4204SJose E. Roman if (cols) { 968d24d4204SJose E. Roman n = a->locc; 969d24d4204SJose E. Roman nb = a->nb; 970d24d4204SJose E. Roman isrc = a->csrc; 971d24d4204SJose E. Roman nproc = a->grid->npcol; 972d24d4204SJose E. Roman iproc = a->grid->mycol; 9739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 974d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9759566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 976d24d4204SJose E. Roman } 9773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 978d24d4204SJose E. Roman } 979d24d4204SJose E. Roman 980d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 981d71ae5a4SJacob Faibussowitsch { 982d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 983d24d4204SJose E. Roman Mat Bmpi; 984d24d4204SJose E. Roman MPI_Comm comm; 985a00b6df3SJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 9864b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork; 9874b1a79daSJose E. Roman const PetscScalar *vwork; 988d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 989d24d4204SJose E. Roman PetscScalar *barray; 9904b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE; 9914b1a79daSJose E. Roman PetscMPIInt size; 992d24d4204SJose E. Roman 993d24d4204SJose E. Roman PetscFunctionBegin; 9949566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9959566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 9964b1a79daSJose E. Roman 9974b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9999566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 10009371c9d4SSatish Balay for (i = 0; i < size; i++) 10019371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) { 10029371c9d4SSatish Balay differ = PETSC_TRUE; 10039371c9d4SSatish Balay break; 10049371c9d4SSatish Balay } 10054b1a79daSJose E. Roman } 10064b1a79daSJose E. Roman 10074b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 10089566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 10094b1a79daSJose E. Roman m = PETSC_DECIDE; 10109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 10114b1a79daSJose E. Roman n = PETSC_DECIDE; 10129566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10149566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 10159566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 10164b1a79daSJose E. Roman 10174b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1019a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 10206497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ 1021792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 10224b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info); 10234b1a79daSJose E. Roman 10244b1a79daSJose E. Roman /* redistribute matrix */ 10259566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1026792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10279566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 10289566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10299566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 10304b1a79daSJose E. Roman 10314b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 10329566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 10334b1a79daSJose E. Roman for (i = rstart; i < rend; i++) { 10349566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 10359566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 10369566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 10374b1a79daSJose E. Roman } 10389566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 10399566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 10409566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 10414b1a79daSJose E. Roman 10424b1a79daSJose E. Roman } else { /* normal cases */ 1043d24d4204SJose E. Roman 1044d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1045d24d4204SJose E. Roman else { 10469566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 104792c846b4SJose E. Roman m = PETSC_DECIDE; 10489566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 104992c846b4SJose E. Roman n = PETSC_DECIDE; 10509566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10529566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 10539566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1054d24d4204SJose E. Roman } 1055d24d4204SJose E. Roman 1056d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10579566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1058a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 10596497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ 1060792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1061d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1062d24d4204SJose E. Roman 1063d24d4204SJose E. Roman /* redistribute matrix */ 10649566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1065792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10669566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1067d24d4204SJose E. Roman 10689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1070d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10719566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1072d24d4204SJose E. Roman } else *B = Bmpi; 10734b1a79daSJose E. Roman } 10743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1075d24d4204SJose E. Roman } 1076d24d4204SJose E. Roman 1077d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1078d71ae5a4SJacob Faibussowitsch { 1079b12397e7SPierre Jolivet const PetscInt *ranges; 1080b12397e7SPierre Jolivet PetscMPIInt size; 1081b12397e7SPierre Jolivet PetscInt i, n; 1082b12397e7SPierre Jolivet 1083b12397e7SPierre Jolivet PetscFunctionBegin; 1084b12397e7SPierre Jolivet *correct = PETSC_TRUE; 1085b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1086b12397e7SPierre Jolivet if (size > 1) { 1087b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges)); 1088b12397e7SPierre Jolivet n = ranges[1] - ranges[0]; 10899371c9d4SSatish Balay for (i = 1; i < size; i++) 10909371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break; 1091b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1092b12397e7SPierre Jolivet } 10933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1094b12397e7SPierre Jolivet } 1095b12397e7SPierre Jolivet 1096d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1097d71ae5a4SJacob Faibussowitsch { 1098d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1099d24d4204SJose E. Roman Mat Bmpi; 1100d24d4204SJose E. Roman MPI_Comm comm; 110192c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1102b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols; 1103d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1104c87776d3SJose E. Roman const PetscScalar *aarray; 1105b12397e7SPierre Jolivet IS ir, ic; 11064e8b52a1SJose E. Roman PetscInt lda; 1107b12397e7SPierre Jolivet PetscBool flg; 1108d24d4204SJose E. Roman 1109d24d4204SJose E. Roman PetscFunctionBegin; 11109566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1111d24d4204SJose E. Roman 1112d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1113d24d4204SJose E. Roman else { 11149566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 111592c846b4SJose E. Roman m = PETSC_DECIDE; 11169566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 111792c846b4SJose E. Roman n = PETSC_DECIDE; 11189566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 11199566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 11209566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 11219566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1122d24d4204SJose E. Roman } 1123d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data; 1124d24d4204SJose E. Roman 1125b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 1126c87776d3SJose E. Roman PetscCall(MatDenseGetArrayRead(A, &aarray)); 1127b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1128b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1129b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1130d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 11319566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 11329566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 11336497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */ 1134792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1135d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1136d24d4204SJose E. Roman 1137d24d4204SJose E. Roman /* redistribute matrix */ 1138792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1139b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1140b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1141b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1142b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1143b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1144b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows)); 1145b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols)); 1146b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1147b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows)); 1148b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols)); 1149b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1150b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1151b12397e7SPierre Jolivet } 1152c87776d3SJose E. Roman PetscCall(MatDenseRestoreArrayRead(A, &aarray)); 11539566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 11549566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1155d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 11569566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1157d24d4204SJose E. Roman } else *B = Bmpi; 11583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1159d24d4204SJose E. Roman } 1160d24d4204SJose E. Roman 1161d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1162d71ae5a4SJacob Faibussowitsch { 1163d24d4204SJose E. Roman Mat mat_scal; 1164d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1165d24d4204SJose E. Roman const PetscInt *cols; 1166d24d4204SJose E. Roman const PetscScalar *vals; 1167d24d4204SJose E. Roman 1168d24d4204SJose E. Roman PetscFunctionBegin; 1169d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1170d24d4204SJose E. Roman mat_scal = *newmat; 11719566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1172d24d4204SJose E. Roman } else { 11739566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1174d24d4204SJose E. Roman m = PETSC_DECIDE; 11759566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1176d24d4204SJose E. Roman n = PETSC_DECIDE; 11779566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11789566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11799566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11809566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1181d24d4204SJose E. Roman } 1182d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11839566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11849566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 11859566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1186d24d4204SJose E. Roman } 11879566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11889566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1189d24d4204SJose E. Roman 11909566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1191d24d4204SJose E. Roman else *newmat = mat_scal; 11923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1193d24d4204SJose E. Roman } 1194d24d4204SJose E. Roman 1195d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1196d71ae5a4SJacob Faibussowitsch { 1197d24d4204SJose E. Roman Mat mat_scal; 1198d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1199d24d4204SJose E. Roman const PetscInt *cols; 1200d24d4204SJose E. Roman const PetscScalar *vals; 1201d24d4204SJose E. Roman PetscScalar v; 1202d24d4204SJose E. Roman 1203d24d4204SJose E. Roman PetscFunctionBegin; 1204d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1205d24d4204SJose E. Roman mat_scal = *newmat; 12069566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1207d24d4204SJose E. Roman } else { 12089566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1209d24d4204SJose E. Roman m = PETSC_DECIDE; 12109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1211d24d4204SJose E. Roman n = PETSC_DECIDE; 12129566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 12139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 12149566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 12159566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1216d24d4204SJose E. Roman } 12179566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1218d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 12199566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 12209566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1221d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */ 1222d24d4204SJose E. Roman if (cols[j] == row) continue; 1223b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 12249566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1225d24d4204SJose E. Roman } 12269566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1227d24d4204SJose E. Roman } 12289566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 12299566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 12309566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1231d24d4204SJose E. Roman 12329566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1233d24d4204SJose E. Roman else *newmat = mat_scal; 12343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1235d24d4204SJose E. Roman } 1236d24d4204SJose E. Roman 1237d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1238d71ae5a4SJacob Faibussowitsch { 1239d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1240d24d4204SJose E. Roman PetscInt sz = 0; 1241d24d4204SJose E. Roman 1242d24d4204SJose E. Roman PetscFunctionBegin; 12439566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12449566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1245d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1246d24d4204SJose E. Roman 12479566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12489566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 12499566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc)); 1250d24d4204SJose E. Roman 1251d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 12523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1253d24d4204SJose E. Roman } 1254d24d4204SJose E. Roman 1255d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1256d71ae5a4SJacob Faibussowitsch { 1257d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1258d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1259d24d4204SJose E. Roman PetscBool flg; 1260d24d4204SJose E. Roman MPI_Comm icomm; 1261d24d4204SJose E. Roman 1262d24d4204SJose E. Roman PetscFunctionBegin; 12639566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 12649566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12659566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 12669566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 12679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1268d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1269d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1270d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1271d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 12729566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 12739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1274d24d4204SJose E. Roman } 12759566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 12769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 12779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 12799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 12809566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 12813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1282d24d4204SJose E. Roman } 1283d24d4204SJose E. Roman 128466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1285d71ae5a4SJacob Faibussowitsch { 1286d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1287d24d4204SJose E. Roman PetscBLASInt info = 0; 1288b12397e7SPierre Jolivet PetscBool flg; 1289d24d4204SJose E. Roman 1290d24d4204SJose E. Roman PetscFunctionBegin; 12919566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12929566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1293d24d4204SJose E. Roman 1294b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1295b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1296b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1297b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1298b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1299d24d4204SJose E. Roman 1300d24d4204SJose E. Roman /* compute local sizes */ 13019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 13029566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1303d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1304d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1305d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr); 1306d24d4204SJose E. Roman 1307d24d4204SJose E. Roman /* allocate local array */ 13089566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1309d24d4204SJose E. Roman 1310d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1311792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1312d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 13133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1314d24d4204SJose E. Roman } 1315d24d4204SJose E. Roman 131666976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1317d71ae5a4SJacob Faibussowitsch { 1318d24d4204SJose E. Roman PetscInt nstash, reallocs; 1319d24d4204SJose E. Roman 1320d24d4204SJose E. Roman PetscFunctionBegin; 13213ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 13229566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 13239566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 13249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 13253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1326d24d4204SJose E. Roman } 1327d24d4204SJose E. Roman 132866976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1329d71ae5a4SJacob Faibussowitsch { 1330d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1331d24d4204SJose E. Roman PetscMPIInt n; 1332d24d4204SJose E. Roman PetscInt i, flg, *row, *col; 1333d24d4204SJose E. Roman PetscScalar *val; 1334d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1335d24d4204SJose E. Roman 1336d24d4204SJose E. Roman PetscFunctionBegin; 13373ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1338d24d4204SJose E. Roman while (1) { 13399566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1340d24d4204SJose E. Roman if (!flg) break; 1341d24d4204SJose E. Roman for (i = 0; i < n; i++) { 13429566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 13439566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1344792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1345aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1346d24d4204SJose E. Roman switch (A->insertmode) { 1347d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 1348d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1349d71ae5a4SJacob Faibussowitsch break; 1350d71ae5a4SJacob Faibussowitsch case ADD_VALUES: 1351d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1352d71ae5a4SJacob Faibussowitsch break; 1353d71ae5a4SJacob Faibussowitsch default: 1354d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1355d24d4204SJose E. Roman } 1356d24d4204SJose E. Roman } 1357d24d4204SJose E. Roman } 13589566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 13593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1360d24d4204SJose E. Roman } 1361d24d4204SJose E. Roman 136266976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1363d71ae5a4SJacob Faibussowitsch { 1364d24d4204SJose E. Roman Mat Adense, As; 1365d24d4204SJose E. Roman MPI_Comm comm; 1366d24d4204SJose E. Roman 1367d24d4204SJose E. Roman PetscFunctionBegin; 13689566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 13699566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense)); 13709566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE)); 13719566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer)); 13729566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 13739566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 13749566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As)); 13753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1376d24d4204SJose E. Roman } 1377d24d4204SJose E. Roman 13789371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 13794cc2b5b5SPierre Jolivet NULL, 13804cc2b5b5SPierre Jolivet NULL, 1381d24d4204SJose E. Roman MatMult_ScaLAPACK, 1382d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1383d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1384d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1385d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1386d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 13874cc2b5b5SPierre Jolivet NULL, 13884cc2b5b5SPierre Jolivet /*10*/ NULL, 1389d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1390d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 13914cc2b5b5SPierre Jolivet NULL, 1392d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1393d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 13944cc2b5b5SPierre Jolivet NULL, 1395d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1396d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1397d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1398d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1399d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1400d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1401d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 14024cc2b5b5SPierre Jolivet /*24*/ NULL, 1403d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1404d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1405d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1406d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1407d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 14084cc2b5b5SPierre Jolivet NULL, 14094cc2b5b5SPierre Jolivet NULL, 14104cc2b5b5SPierre Jolivet NULL, 14114cc2b5b5SPierre Jolivet NULL, 1412d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 14134cc2b5b5SPierre Jolivet NULL, 14144cc2b5b5SPierre Jolivet NULL, 14154cc2b5b5SPierre Jolivet NULL, 14164cc2b5b5SPierre Jolivet NULL, 1417d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 14184cc2b5b5SPierre Jolivet NULL, 14194cc2b5b5SPierre Jolivet NULL, 14204cc2b5b5SPierre Jolivet NULL, 1421d24d4204SJose E. Roman MatCopy_ScaLAPACK, 14224cc2b5b5SPierre Jolivet /*44*/ NULL, 1423d24d4204SJose E. Roman MatScale_ScaLAPACK, 1424d24d4204SJose E. Roman MatShift_ScaLAPACK, 14254cc2b5b5SPierre Jolivet NULL, 14264cc2b5b5SPierre Jolivet NULL, 14274cc2b5b5SPierre Jolivet /*49*/ NULL, 14284cc2b5b5SPierre Jolivet NULL, 14294cc2b5b5SPierre Jolivet NULL, 14304cc2b5b5SPierre Jolivet NULL, 14314cc2b5b5SPierre Jolivet NULL, 14324cc2b5b5SPierre Jolivet /*54*/ NULL, 14334cc2b5b5SPierre Jolivet NULL, 14344cc2b5b5SPierre Jolivet NULL, 14354cc2b5b5SPierre Jolivet NULL, 14364cc2b5b5SPierre Jolivet NULL, 14374cc2b5b5SPierre Jolivet /*59*/ NULL, 1438d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1439d24d4204SJose E. Roman MatView_ScaLAPACK, 14404cc2b5b5SPierre Jolivet NULL, 14414cc2b5b5SPierre Jolivet NULL, 14424cc2b5b5SPierre Jolivet /*64*/ NULL, 14434cc2b5b5SPierre Jolivet NULL, 14444cc2b5b5SPierre Jolivet NULL, 14454cc2b5b5SPierre Jolivet NULL, 14464cc2b5b5SPierre Jolivet NULL, 14474cc2b5b5SPierre Jolivet /*69*/ NULL, 1448d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 14494cc2b5b5SPierre Jolivet NULL, 14504cc2b5b5SPierre Jolivet NULL, 1451*8bb0f5c6SPierre Jolivet NULL, 14524cc2b5b5SPierre Jolivet /*74*/ NULL, 14534cc2b5b5SPierre Jolivet NULL, 14544cc2b5b5SPierre Jolivet NULL, 14554cc2b5b5SPierre Jolivet NULL, 1456*8bb0f5c6SPierre Jolivet MatLoad_ScaLAPACK, 14574cc2b5b5SPierre Jolivet /*79*/ NULL, 14584cc2b5b5SPierre Jolivet NULL, 14594cc2b5b5SPierre Jolivet NULL, 14604cc2b5b5SPierre Jolivet NULL, 1461*8bb0f5c6SPierre Jolivet NULL, 14624cc2b5b5SPierre Jolivet /*84*/ NULL, 1463d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 14644cc2b5b5SPierre Jolivet NULL, 14654cc2b5b5SPierre Jolivet NULL, 1466d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1467*8bb0f5c6SPierre Jolivet /*89*/ NULL, 1468*8bb0f5c6SPierre Jolivet MatProductSetFromOptions_ScaLAPACK, 14694cc2b5b5SPierre Jolivet NULL, 14704cc2b5b5SPierre Jolivet NULL, 1471d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1472*8bb0f5c6SPierre Jolivet /*94*/ NULL, 14734cc2b5b5SPierre Jolivet NULL, 14744cc2b5b5SPierre Jolivet NULL, 14754cc2b5b5SPierre Jolivet NULL, 14764cc2b5b5SPierre Jolivet NULL, 1477*8bb0f5c6SPierre Jolivet /*99*/ NULL, 1478*8bb0f5c6SPierre Jolivet MatMatSolve_ScaLAPACK, 14794cc2b5b5SPierre Jolivet NULL, 14804cc2b5b5SPierre Jolivet NULL, 14814cc2b5b5SPierre Jolivet NULL, 1482*8bb0f5c6SPierre Jolivet /*104*/ MatMissingDiagonal_ScaLAPACK, 1483*8bb0f5c6SPierre Jolivet NULL, 1484*8bb0f5c6SPierre Jolivet NULL, 1485*8bb0f5c6SPierre Jolivet NULL, 1486*8bb0f5c6SPierre Jolivet NULL, 1487*8bb0f5c6SPierre Jolivet /*109*/ NULL, 1488*8bb0f5c6SPierre Jolivet NULL, 1489*8bb0f5c6SPierre Jolivet MatHermitianTranspose_ScaLAPACK, 1490*8bb0f5c6SPierre Jolivet MatMultHermitianTranspose_ScaLAPACK, 1491*8bb0f5c6SPierre Jolivet MatMultHermitianTransposeAdd_ScaLAPACK, 14924cc2b5b5SPierre Jolivet /*114*/ NULL, 14934cc2b5b5SPierre Jolivet NULL, 14944cc2b5b5SPierre Jolivet NULL, 14954cc2b5b5SPierre Jolivet NULL, 14964cc2b5b5SPierre Jolivet NULL, 14974cc2b5b5SPierre Jolivet /*119*/ NULL, 1498*8bb0f5c6SPierre Jolivet NULL, 1499*8bb0f5c6SPierre Jolivet NULL, 1500*8bb0f5c6SPierre Jolivet MatTransposeMatMultNumeric_ScaLAPACK, 15014cc2b5b5SPierre Jolivet NULL, 15024cc2b5b5SPierre Jolivet /*124*/ NULL, 15034cc2b5b5SPierre Jolivet NULL, 15044cc2b5b5SPierre Jolivet NULL, 15054cc2b5b5SPierre Jolivet NULL, 15064cc2b5b5SPierre Jolivet NULL, 15074cc2b5b5SPierre Jolivet /*129*/ NULL, 15084cc2b5b5SPierre Jolivet NULL, 15094cc2b5b5SPierre Jolivet NULL, 1510*8bb0f5c6SPierre Jolivet NULL, 15114cc2b5b5SPierre Jolivet NULL, 15124cc2b5b5SPierre Jolivet /*134*/ NULL, 15134cc2b5b5SPierre Jolivet NULL, 15144cc2b5b5SPierre Jolivet NULL, 15154cc2b5b5SPierre Jolivet NULL, 15164cc2b5b5SPierre Jolivet NULL, 15174cc2b5b5SPierre Jolivet NULL, 15184cc2b5b5SPierre Jolivet /*140*/ NULL, 15194cc2b5b5SPierre Jolivet NULL, 15204cc2b5b5SPierre Jolivet NULL}; 1521d24d4204SJose E. Roman 1522d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1523d71ae5a4SJacob Faibussowitsch { 15246497c311SBarry Smith PetscInt *owner, *startv, *starti, bs2; 1525d24d4204SJose E. Roman PetscInt size = stash->size, nsends; 15266497c311SBarry Smith PetscInt *sindices, **rindices, j, l; 1527d24d4204SJose E. Roman PetscScalar **rvalues, *svalues; 1528d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1529d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 15306497c311SBarry Smith PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii; 1531d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy; 1532d24d4204SJose E. Roman PetscScalar *sp_val; 1533d24d4204SJose E. Roman PetscMatStashSpace space, space_next; 1534d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1535d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1536d24d4204SJose E. Roman 1537d24d4204SJose E. Roman PetscFunctionBegin; 1538d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1539d24d4204SJose E. Roman InsertMode addv; 1540462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 154108401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1542d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1543d24d4204SJose E. Roman } 1544d24d4204SJose E. Roman 1545d24d4204SJose E. Roman bs2 = stash->bs * stash->bs; 1546d24d4204SJose E. Roman 1547d24d4204SJose E. Roman /* first count number of contributors to each processor */ 15489566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 15499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1550d24d4204SJose E. Roman 15516497c311SBarry Smith ii = j = 0; 1552d24d4204SJose E. Roman space = stash->space_head; 1553d24d4204SJose E. Roman while (space) { 1554d24d4204SJose E. Roman space_next = space->next; 1555d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 15569566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 15579566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1558792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1559d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 15609371c9d4SSatish Balay nlengths[j]++; 15616497c311SBarry Smith owner[ii] = j; 15626497c311SBarry Smith ii++; 1563d24d4204SJose E. Roman } 1564d24d4204SJose E. Roman space = space_next; 1565d24d4204SJose E. Roman } 1566d24d4204SJose E. Roman 1567d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 15689566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 15696497c311SBarry Smith nsends = 0; 15706497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 1571d24d4204SJose E. Roman if (nlengths[i]) { 15729371c9d4SSatish Balay sizes[i] = 1; 15739371c9d4SSatish Balay nsends++; 1574d24d4204SJose E. Roman } 1575d24d4204SJose E. Roman } 1576d24d4204SJose E. Roman 15779371c9d4SSatish Balay { 15789371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 15796497c311SBarry Smith 1580d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 15819566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 15826497c311SBarry Smith PetscCall(PetscMPIIntCast(nsends, &insends)); 15836497c311SBarry Smith PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths)); 1584d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 15856497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2; 15869566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1587d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 15886497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2); 15899566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 15909566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 15919371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 15929371c9d4SSatish Balay } 1593d24d4204SJose E. Roman 1594d24d4204SJose E. Roman /* do sends: 1595d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1596d24d4204SJose E. Roman the ith processor 1597d24d4204SJose E. Roman */ 15989566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 15999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 16009566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1601d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 16029371c9d4SSatish Balay startv[0] = 0; 16039371c9d4SSatish Balay starti[0] = 0; 16046497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) { 1605d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1]; 1606d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1607d24d4204SJose E. Roman } 1608d24d4204SJose E. Roman 16096497c311SBarry Smith ii = 0; 1610d24d4204SJose E. Roman space = stash->space_head; 1611d24d4204SJose E. Roman while (space) { 1612d24d4204SJose E. Roman space_next = space->next; 1613d24d4204SJose E. Roman sp_idx = space->idx; 1614d24d4204SJose E. Roman sp_idy = space->idy; 1615d24d4204SJose E. Roman sp_val = space->val; 1616d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 16176497c311SBarry Smith j = owner[ii]; 1618d24d4204SJose E. Roman if (bs2 == 1) { 1619d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1620d24d4204SJose E. Roman } else { 1621d24d4204SJose E. Roman PetscInt k; 1622d24d4204SJose E. Roman PetscScalar *buf1, *buf2; 1623d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j]; 1624d24d4204SJose E. Roman buf2 = space->val + bs2 * l; 1625d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1626d24d4204SJose E. Roman } 1627d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1628d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1629d24d4204SJose E. Roman startv[j]++; 1630d24d4204SJose E. Roman starti[j]++; 16316497c311SBarry Smith ii++; 1632d24d4204SJose E. Roman } 1633d24d4204SJose E. Roman space = space_next; 1634d24d4204SJose E. Roman } 1635d24d4204SJose E. Roman startv[0] = 0; 16366497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1637d24d4204SJose E. Roman 16386497c311SBarry Smith for (PetscMPIInt i = 0, count = 0; i < size; i++) { 1639d24d4204SJose E. Roman if (sizes[i]) { 16406497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 16416497c311SBarry Smith PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1642d24d4204SJose E. Roman } 1643d24d4204SJose E. Roman } 1644d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 16459566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 16466497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 16476497c311SBarry Smith if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1648d24d4204SJose E. Roman } 1649d24d4204SJose E. Roman #endif 16509566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 16519566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 16529566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 16539566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1654d24d4204SJose E. Roman 1655d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 16569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1657d24d4204SJose E. Roman 16586497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) { 1659d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i]; 1660d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i]; 1661d24d4204SJose E. Roman } 1662d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1663d24d4204SJose E. Roman 16649566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 16659566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1666d24d4204SJose E. Roman 1667d24d4204SJose E. Roman stash->svalues = svalues; 1668d24d4204SJose E. Roman stash->sindices = sindices; 1669d24d4204SJose E. Roman stash->rvalues = rvalues; 1670d24d4204SJose E. Roman stash->rindices = rindices; 1671d24d4204SJose E. Roman stash->send_waits = send_waits; 16726497c311SBarry Smith stash->nsends = (PetscMPIInt)nsends; 1673d24d4204SJose E. Roman stash->nrecvs = nreceives; 1674d24d4204SJose E. Roman stash->reproduce_count = 0; 16753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1676d24d4204SJose E. Roman } 1677d24d4204SJose E. Roman 1678d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1679d71ae5a4SJacob Faibussowitsch { 1680d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1681d24d4204SJose E. Roman 1682d24d4204SJose E. Roman PetscFunctionBegin; 168328b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1684aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1685aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 16869566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 16879566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 16883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1689d24d4204SJose E. Roman } 1690d24d4204SJose E. Roman 1691d24d4204SJose E. Roman /*@ 16926aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 169311a5261eSBarry Smith the `MATSCALAPACK` matrix 1694d24d4204SJose E. Roman 1695c3339decSBarry Smith Logically Collective 1696d24d4204SJose E. Roman 1697d8d19677SJose E. Roman Input Parameters: 169811a5261eSBarry Smith + A - a `MATSCALAPACK` matrix 1699d24d4204SJose E. Roman . mb - the row block size 1700d24d4204SJose E. Roman - nb - the column block size 1701d24d4204SJose E. Roman 1702d24d4204SJose E. Roman Level: intermediate 1703d24d4204SJose E. Roman 17042ef1f0ffSBarry Smith Note: 17052ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 17062ef1f0ffSBarry Smith 17071cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1708d24d4204SJose E. Roman @*/ 1709d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1710d71ae5a4SJacob Faibussowitsch { 1711d24d4204SJose E. Roman PetscFunctionBegin; 1712d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1713d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2); 1714d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3); 1715cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 17163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1717d24d4204SJose E. Roman } 1718d24d4204SJose E. Roman 1719d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1720d71ae5a4SJacob Faibussowitsch { 1721d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1722d24d4204SJose E. Roman 1723d24d4204SJose E. Roman PetscFunctionBegin; 1724d24d4204SJose E. Roman if (mb) *mb = a->mb; 1725d24d4204SJose E. Roman if (nb) *nb = a->nb; 17263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1727d24d4204SJose E. Roman } 1728d24d4204SJose E. Roman 1729d24d4204SJose E. Roman /*@ 17306aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 173111a5261eSBarry Smith the `MATSCALAPACK` matrix 1732d24d4204SJose E. Roman 17332ef1f0ffSBarry Smith Not Collective 1734d24d4204SJose E. Roman 1735d24d4204SJose E. Roman Input Parameter: 173611a5261eSBarry Smith . A - a `MATSCALAPACK` matrix 1737d24d4204SJose E. Roman 1738d24d4204SJose E. Roman Output Parameters: 1739d24d4204SJose E. Roman + mb - the row block size 1740d24d4204SJose E. Roman - nb - the column block size 1741d24d4204SJose E. Roman 1742d24d4204SJose E. Roman Level: intermediate 1743d24d4204SJose E. Roman 17442ef1f0ffSBarry Smith Note: 17452ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 17462ef1f0ffSBarry Smith 17471cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1748d24d4204SJose E. Roman @*/ 1749d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1750d71ae5a4SJacob Faibussowitsch { 1751d24d4204SJose E. Roman PetscFunctionBegin; 1752d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1753cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 17543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1755d24d4204SJose E. Roman } 1756d24d4204SJose E. Roman 1757d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1758d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1759d24d4204SJose E. Roman 1760d24d4204SJose E. Roman /*MC 1761d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1762d24d4204SJose E. Roman 17632ef1f0ffSBarry Smith Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1764d24d4204SJose E. Roman 1765d24d4204SJose E. Roman Options Database Keys: 17662ef1f0ffSBarry Smith + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 17672ef1f0ffSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1768d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1769d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1770d24d4204SJose E. Roman 17712ef1f0ffSBarry Smith Level: intermediate 17722ef1f0ffSBarry Smith 177389bba20eSBarry Smith Note: 177489bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 177589bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 177689bba20eSBarry Smith the given rank. 177789bba20eSBarry Smith 17781cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1779d24d4204SJose E. Roman M*/ 1780d24d4204SJose E. Roman 1781d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1782d71ae5a4SJacob Faibussowitsch { 1783d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1784d24d4204SJose E. Roman PetscBool flg, flg1; 1785d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1786d24d4204SJose E. Roman MPI_Comm icomm; 1787d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol; 1788d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0}; 1789d24d4204SJose E. Roman PetscMPIInt size; 1790d24d4204SJose E. Roman 1791d24d4204SJose E. Roman PetscFunctionBegin; 1792aea10558SJacob Faibussowitsch A->ops[0] = MatOps_Values; 1793d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1794d24d4204SJose E. Roman 17959566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1796d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1797d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1798d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1799d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1800d24d4204SJose E. Roman 18014dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1802d24d4204SJose E. Roman A->data = (void *)a; 1803d24d4204SJose E. Roman 1804d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1805d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1806c8025a54SPierre Jolivet PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL)); 18079566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 18089566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1809d24d4204SJose E. Roman } 18109566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 18119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1812d24d4204SJose E. Roman if (!flg) { 18134dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&grid)); 1814d24d4204SJose E. Roman 18159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size)); 18166497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow)); 1817d24d4204SJose E. Roman 1818d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 18199566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1820d24d4204SJose E. Roman if (flg1) { 182108401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 18226497c311SBarry Smith PetscCall(PetscBLASIntCast(optv1, &grid->nprow)); 1823d24d4204SJose E. Roman } 1824d0609cedSBarry Smith PetscOptionsEnd(); 1825d24d4204SJose E. Roman 1826d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1827d24d4204SJose E. Roman grid->npcol = size / grid->nprow; 18289566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 18299566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1830f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1831d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1832d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1833d24d4204SJose E. Roman grid->grid_refct = 1; 1834d24d4204SJose E. Roman grid->nprow = nprow; 1835d24d4204SJose E. Roman grid->npcol = npcol; 1836d24d4204SJose E. Roman grid->myrow = myrow; 1837d24d4204SJose E. Roman grid->mycol = mycol; 1838d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1839f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1840d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1841f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1842d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 18439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1844d24d4204SJose E. Roman 1845d24d4204SJose E. Roman } else grid->grid_refct++; 18469566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1847d24d4204SJose E. Roman a->grid = grid; 1848d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1849d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1850d24d4204SJose E. Roman 1851d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 18529566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1853d24d4204SJose E. Roman if (flg) { 18546497c311SBarry Smith a->mb = (PetscMPIInt)array[0]; 18556497c311SBarry Smith a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb; 1856d24d4204SJose E. Roman } 1857d0609cedSBarry Smith PetscOptionsEnd(); 1858d24d4204SJose E. Roman 1859b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 18609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 18619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 18629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 18639566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 18643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1865d24d4204SJose E. Roman } 1866d24d4204SJose E. Roman 1867d24d4204SJose E. Roman /*@C 1868d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 186911a5261eSBarry Smith (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1870d24d4204SJose E. Roman 1871d24d4204SJose E. Roman Collective 1872d24d4204SJose E. Roman 1873d24d4204SJose E. Roman Input Parameters: 1874d24d4204SJose E. Roman + comm - MPI communicator 187511a5261eSBarry Smith . mb - row block size (or `PETSC_DECIDE` to have it set) 187611a5261eSBarry Smith . nb - column block size (or `PETSC_DECIDE` to have it set) 1877d24d4204SJose E. Roman . M - number of global rows 1878d24d4204SJose E. Roman . N - number of global columns 1879d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1880d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1881d24d4204SJose E. Roman 1882d24d4204SJose E. Roman Output Parameter: 1883d24d4204SJose E. Roman . A - the matrix 1884d24d4204SJose E. Roman 188511a5261eSBarry Smith Options Database Key: 1886d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1887d24d4204SJose E. Roman 18882ef1f0ffSBarry Smith Level: intermediate 18892ef1f0ffSBarry Smith 18902ef1f0ffSBarry Smith Notes: 18912ef1f0ffSBarry Smith If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 18922ef1f0ffSBarry Smith 189311a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1894d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 189511a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1896d24d4204SJose E. Roman 189746091a0eSPierre Jolivet Storage is completely managed by ScaLAPACK, so this requires PETSc to be 1898d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1899d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 190011a5261eSBarry Smith used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1901d24d4204SJose E. Roman 19021cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1903d24d4204SJose E. Roman @*/ 1904d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1905d71ae5a4SJacob Faibussowitsch { 1906d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1907d24d4204SJose E. Roman PetscInt m, n; 1908d24d4204SJose E. Roman 1909d24d4204SJose E. Roman PetscFunctionBegin; 19109566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 19119566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK)); 1912aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1913d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1914d24d4204SJose E. Roman m = PETSC_DECIDE; 19159566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1916d24d4204SJose E. Roman n = PETSC_DECIDE; 19179566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 19189566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1919d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data; 19209566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M)); 19219566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N)); 19229566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 19239566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 19249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 19259566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 19269566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 19273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1928d24d4204SJose E. Roman } 1929