1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23d71ae5a4SJacob Faibussowitsch { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31d71ae5a4SJacob Faibussowitsch { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39d24d4204SJose E. Roman if (iascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59d71ae5a4SJacob Faibussowitsch { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 7257168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 7657168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 844dfa11a4SJacob Faibussowitsch info->memory = 0; /* REVIEW ME */ 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92d71ae5a4SJacob Faibussowitsch { 93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94b12397e7SPierre Jolivet 95d24d4204SJose E. Roman PetscFunctionBegin; 96d24d4204SJose E. Roman switch (op) { 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 100d24d4204SJose E. Roman case MAT_SYMMETRIC: 101d24d4204SJose E. Roman case MAT_SORTED_FULL: 102d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 103d71ae5a4SJacob Faibussowitsch break; 104d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 105d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 106d71ae5a4SJacob Faibussowitsch break; 107d71ae5a4SJacob Faibussowitsch default: 108d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109d24d4204SJose E. Roman } 1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman 113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114d71ae5a4SJacob Faibussowitsch { 115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116d24d4204SJose E. Roman PetscInt i, j; 117d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 119d24d4204SJose E. Roman 120d24d4204SJose E. Roman PetscFunctionBegin; 121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122d24d4204SJose E. Roman for (i = 0; i < nr; i++) { 123d24d4204SJose E. Roman if (rows[i] < 0) continue; 1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125d24d4204SJose E. Roman for (j = 0; j < nc; j++) { 126d24d4204SJose E. Roman if (cols[j] < 0) continue; 1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130b12397e7SPierre Jolivet if (roworiented) { 131d24d4204SJose E. Roman switch (imode) { 132d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 133d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134d71ae5a4SJacob Faibussowitsch break; 135d71ae5a4SJacob Faibussowitsch default: 136d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137d71ae5a4SJacob Faibussowitsch break; 138b12397e7SPierre Jolivet } 139b12397e7SPierre Jolivet } else { 140b12397e7SPierre Jolivet switch (imode) { 141d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 142d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143d71ae5a4SJacob Faibussowitsch break; 144d71ae5a4SJacob Faibussowitsch default: 145d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146d71ae5a4SJacob Faibussowitsch break; 147b12397e7SPierre Jolivet } 148d24d4204SJose E. Roman } 149d24d4204SJose E. Roman } else { 15028b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 152b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153d24d4204SJose E. Roman } 154d24d4204SJose E. Roman } 155d24d4204SJose E. Roman } 1563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 157d24d4204SJose E. Roman } 158d24d4204SJose E. Roman 159*5e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160d71ae5a4SJacob Faibussowitsch { 161d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0; 163d24d4204SJose E. Roman const PetscInt *ranges; 164d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman PetscFunctionBegin; 167d24d4204SJose E. Roman if (transpose) { 168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 172792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1749566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176d24d4204SJose E. Roman ylld = 1; 177792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 179d24d4204SJose E. Roman 180d24d4204SJose E. Roman /* allocate 2d vectors */ 181d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 189792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 191d24d4204SJose E. Roman 192d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 193792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194d24d4204SJose E. Roman 195d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 196792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* call PBLAS subroutine */ 199*5e4d33a3SBlanca Mellado Pinto if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200*5e4d33a3SBlanca Mellado Pinto else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 201d24d4204SJose E. Roman 202d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 203792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 204d24d4204SJose E. Roman 205d24d4204SJose E. Roman } else { /* non-transpose */ 206d24d4204SJose E. Roman 207d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 2089566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 2099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 210d24d4204SJose E. Roman xlld = 1; 211792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 212d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 2139566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 2149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 215d24d4204SJose E. Roman ylld = PetscMax(1, A->rmap->n); 216792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 217d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 218d24d4204SJose E. Roman 219d24d4204SJose E. Roman /* allocate 2d vectors */ 220d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 221d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 2229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 223d24d4204SJose E. Roman ylld = PetscMax(1, lszy); 224d24d4204SJose E. Roman 225d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 226792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 227d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 228792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 229d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 230d24d4204SJose E. Roman 231d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 232792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 233d24d4204SJose E. Roman 234d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 235792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 236d24d4204SJose E. Roman 237d24d4204SJose E. Roman /* call PBLAS subroutine */ 238792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 239d24d4204SJose E. Roman 240d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 241792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 242d24d4204SJose E. Roman } 2439566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d)); 2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 245d24d4204SJose E. Roman } 246d24d4204SJose E. Roman 247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 248d71ae5a4SJacob Faibussowitsch { 249d24d4204SJose E. Roman const PetscScalar *xarray; 250d24d4204SJose E. Roman PetscScalar *yarray; 251d24d4204SJose E. Roman 252d24d4204SJose E. Roman PetscFunctionBegin; 2539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2549566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 255*5e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); 2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259d24d4204SJose E. Roman } 260d24d4204SJose E. Roman 261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 262d71ae5a4SJacob Faibussowitsch { 263d24d4204SJose E. Roman const PetscScalar *xarray; 264d24d4204SJose E. Roman PetscScalar *yarray; 265d24d4204SJose E. Roman 266d24d4204SJose E. Roman PetscFunctionBegin; 2679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 269*5e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 273d24d4204SJose E. Roman } 274d24d4204SJose E. Roman 275*5e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 276*5e4d33a3SBlanca Mellado Pinto { 277*5e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray; 278*5e4d33a3SBlanca Mellado Pinto PetscScalar *yarray; 279*5e4d33a3SBlanca Mellado Pinto 280*5e4d33a3SBlanca Mellado Pinto PetscFunctionBegin; 281*5e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray)); 282*5e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayWrite(y, &yarray)); 283*5e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); 284*5e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray)); 285*5e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayWrite(y, &yarray)); 286*5e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 287*5e4d33a3SBlanca Mellado Pinto } 288*5e4d33a3SBlanca Mellado Pinto 289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290d71ae5a4SJacob Faibussowitsch { 291d24d4204SJose E. Roman const PetscScalar *xarray; 292d24d4204SJose E. Roman PetscScalar *zarray; 293d24d4204SJose E. Roman 294d24d4204SJose E. Roman PetscFunctionBegin; 2959566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 298*5e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); 2999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 302d24d4204SJose E. Roman } 303d24d4204SJose E. Roman 304d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 305d71ae5a4SJacob Faibussowitsch { 306d24d4204SJose E. Roman const PetscScalar *xarray; 307d24d4204SJose E. Roman PetscScalar *zarray; 308d24d4204SJose E. Roman 309d24d4204SJose E. Roman PetscFunctionBegin; 3109566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 3119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 3129566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 313*5e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); 314*5e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray)); 315*5e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArray(z, &zarray)); 316*5e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 317*5e4d33a3SBlanca Mellado Pinto } 318*5e4d33a3SBlanca Mellado Pinto 319*5e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 320*5e4d33a3SBlanca Mellado Pinto { 321*5e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray; 322*5e4d33a3SBlanca Mellado Pinto PetscScalar *zarray; 323*5e4d33a3SBlanca Mellado Pinto 324*5e4d33a3SBlanca Mellado Pinto PetscFunctionBegin; 325*5e4d33a3SBlanca Mellado Pinto if (y != z) PetscCall(VecCopy(y, z)); 326*5e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray)); 327*5e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArray(z, &zarray)); 328*5e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); 3299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 332d24d4204SJose E. Roman } 333d24d4204SJose E. Roman 334d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 335d71ae5a4SJacob Faibussowitsch { 336d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 337d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 338d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 339d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 340d24d4204SJose E. Roman PetscBLASInt one = 1; 341d24d4204SJose E. Roman 342d24d4204SJose E. Roman PetscFunctionBegin; 343792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 344d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 346d24d4204SJose E. Roman } 347d24d4204SJose E. Roman 348d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 349d71ae5a4SJacob Faibussowitsch { 350d24d4204SJose E. Roman PetscFunctionBegin; 3519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3529566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3539566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 354d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 3553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 356d24d4204SJose E. Roman } 357d24d4204SJose E. Roman 358d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 359d71ae5a4SJacob Faibussowitsch { 360d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 361d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 362d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 363d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 364d24d4204SJose E. Roman PetscBLASInt one = 1; 365d24d4204SJose E. Roman 366d24d4204SJose E. Roman PetscFunctionBegin; 367792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 368d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 370d24d4204SJose E. Roman } 371d24d4204SJose E. Roman 372d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 373d71ae5a4SJacob Faibussowitsch { 374d24d4204SJose E. Roman PetscFunctionBegin; 3759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3769566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3779566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 3783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 379d24d4204SJose E. Roman } 380d24d4204SJose E. Roman 381d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 382d71ae5a4SJacob Faibussowitsch { 383d24d4204SJose E. Roman PetscFunctionBegin; 384d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 385d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 3863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 387d24d4204SJose E. Roman } 388d24d4204SJose E. Roman 389d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 390d71ae5a4SJacob Faibussowitsch { 391d24d4204SJose E. Roman PetscFunctionBegin; 392d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 393d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 3943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 395d24d4204SJose E. Roman } 396d24d4204SJose E. Roman 397d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 398d71ae5a4SJacob Faibussowitsch { 399d24d4204SJose E. Roman Mat_Product *product = C->product; 400d24d4204SJose E. Roman 401d24d4204SJose E. Roman PetscFunctionBegin; 402d24d4204SJose E. Roman switch (product->type) { 403d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 404d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 405d71ae5a4SJacob Faibussowitsch break; 406d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 407d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 408d71ae5a4SJacob Faibussowitsch break; 409d71ae5a4SJacob Faibussowitsch default: 410d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 411d24d4204SJose E. Roman } 4123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 413d24d4204SJose E. Roman } 414d24d4204SJose E. Roman 415d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 416d71ae5a4SJacob Faibussowitsch { 417d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 418d24d4204SJose E. Roman PetscScalar *darray, *d2d, v; 419d24d4204SJose E. Roman const PetscInt *ranges; 420d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 421d24d4204SJose E. Roman 422d24d4204SJose E. Roman PetscFunctionBegin; 4239566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray)); 424d24d4204SJose E. Roman 425d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */ 426d24d4204SJose E. Roman 427d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4289566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 4299566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 430d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 431792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 432d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 433d24d4204SJose E. Roman 434d24d4204SJose E. Roman /* allocate 2d vector */ 435d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 4369566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 437d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 438d24d4204SJose E. Roman 439d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 440792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 441d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 442d24d4204SJose E. Roman 443d24d4204SJose E. Roman /* collect diagonal */ 444d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) { 445792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 446792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 447d24d4204SJose E. Roman } 448d24d4204SJose E. Roman 449d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 450792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 4519566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 452d24d4204SJose E. Roman 453d24d4204SJose E. Roman } else { /* column version */ 454d24d4204SJose E. Roman 455d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4569566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4579566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 458d24d4204SJose E. Roman dlld = 1; 459792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 460d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 461d24d4204SJose E. Roman 462d24d4204SJose E. Roman /* allocate 2d vector */ 463d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4649566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 465d24d4204SJose E. Roman 466d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 467792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 468d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 469d24d4204SJose E. Roman 470d24d4204SJose E. Roman /* collect diagonal */ 471d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) { 472792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 473792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 474d24d4204SJose E. Roman } 475d24d4204SJose E. Roman 476d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 477792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 4789566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 479d24d4204SJose E. Roman } 480d24d4204SJose E. Roman 4819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray)); 4829566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4839566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 4843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 485d24d4204SJose E. Roman } 486d24d4204SJose E. Roman 487d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 488d71ae5a4SJacob Faibussowitsch { 489d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 490d24d4204SJose E. Roman const PetscScalar *d; 491d24d4204SJose E. Roman const PetscInt *ranges; 492d24d4204SJose E. Roman PetscScalar *d2d; 493d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 494d24d4204SJose E. Roman 495d24d4204SJose E. Roman PetscFunctionBegin; 496d24d4204SJose E. Roman if (R) { 4979566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 498d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4999566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 5009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 501d24d4204SJose E. Roman dlld = 1; 502792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 503d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 504d24d4204SJose E. Roman 505d24d4204SJose E. Roman /* allocate 2d vector */ 506d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 5079566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 508d24d4204SJose E. Roman 509d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 510792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 511d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 512d24d4204SJose E. Roman 513d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 514792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 515d24d4204SJose E. Roman 516d24d4204SJose E. Roman /* broadcast along process columns */ 517d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 518d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 519d24d4204SJose E. Roman 520d24d4204SJose E. Roman /* local scaling */ 5219371c9d4SSatish Balay for (j = 0; j < a->locc; j++) 5229371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 523d24d4204SJose E. Roman 5249566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 5259566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 526d24d4204SJose E. Roman } 527d24d4204SJose E. Roman if (L) { 5289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 529d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5309566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 5319566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 532d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 533792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 534d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 535d24d4204SJose E. Roman 536d24d4204SJose E. Roman /* allocate 2d vector */ 537d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 5389566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 539d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 540d24d4204SJose E. Roman 541d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 542792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 543d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 544d24d4204SJose E. Roman 545d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 546792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 547d24d4204SJose E. Roman 548d24d4204SJose E. Roman /* broadcast along process rows */ 549d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 550d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 551d24d4204SJose E. Roman 552d24d4204SJose E. Roman /* local scaling */ 5539371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 5549371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 555d24d4204SJose E. Roman 5569566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 5579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 558d24d4204SJose E. Roman } 5593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 560d24d4204SJose E. Roman } 561d24d4204SJose E. Roman 562d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 563d71ae5a4SJacob Faibussowitsch { 564d24d4204SJose E. Roman PetscFunctionBegin; 565d24d4204SJose E. Roman *missing = PETSC_FALSE; 5663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 567d24d4204SJose E. Roman } 568d24d4204SJose E. Roman 569d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 570d71ae5a4SJacob Faibussowitsch { 571d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 572d24d4204SJose E. Roman PetscBLASInt n, one = 1; 573d24d4204SJose E. Roman 574d24d4204SJose E. Roman PetscFunctionBegin; 575d24d4204SJose E. Roman n = x->lld * x->locc; 576792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 5773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 578d24d4204SJose E. Roman } 579d24d4204SJose E. Roman 580d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 581d71ae5a4SJacob Faibussowitsch { 582d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 583d24d4204SJose E. Roman PetscBLASInt i, n; 584d24d4204SJose E. Roman PetscScalar v; 585d24d4204SJose E. Roman 586d24d4204SJose E. Roman PetscFunctionBegin; 587d24d4204SJose E. Roman n = PetscMin(x->M, x->N); 588d24d4204SJose E. Roman for (i = 1; i <= n; i++) { 589792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 590d24d4204SJose E. Roman v += alpha; 591792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 592d24d4204SJose E. Roman } 5933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 594d24d4204SJose E. Roman } 595d24d4204SJose E. Roman 596d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 597d71ae5a4SJacob Faibussowitsch { 598d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 599d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 600d24d4204SJose E. Roman PetscBLASInt one = 1; 601d24d4204SJose E. Roman PetscScalar beta = 1.0; 602d24d4204SJose E. Roman 603d24d4204SJose E. Roman PetscFunctionBegin; 604d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3); 605792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 6069566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 6073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 608d24d4204SJose E. Roman } 609d24d4204SJose E. Roman 610d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 611d71ae5a4SJacob Faibussowitsch { 612d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 613d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 614d24d4204SJose E. Roman 615d24d4204SJose E. Roman PetscFunctionBegin; 6169566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 6179566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 6183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 619d24d4204SJose E. Roman } 620d24d4204SJose E. Roman 621d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 622d71ae5a4SJacob Faibussowitsch { 623d24d4204SJose E. Roman Mat Bs; 624d24d4204SJose E. Roman MPI_Comm comm; 625d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 626d24d4204SJose E. Roman 627d24d4204SJose E. Roman PetscFunctionBegin; 6289566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 6299566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs)); 6309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 6319566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK)); 632d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 633d24d4204SJose E. Roman b->M = a->M; 634d24d4204SJose E. Roman b->N = a->N; 635d24d4204SJose E. Roman b->mb = a->mb; 636d24d4204SJose E. Roman b->nb = a->nb; 637d24d4204SJose E. Roman b->rsrc = a->rsrc; 638d24d4204SJose E. Roman b->csrc = a->csrc; 6399566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 640d24d4204SJose E. Roman *B = Bs; 64148a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 642d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 644d24d4204SJose E. Roman } 645d24d4204SJose E. Roman 646d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 647d71ae5a4SJacob Faibussowitsch { 648d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 649d24d4204SJose E. Roman Mat Bs = *B; 650d24d4204SJose E. Roman PetscBLASInt one = 1; 651d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 652d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 653d24d4204SJose E. Roman PetscInt i, j; 654d24d4204SJose E. Roman #endif 655d24d4204SJose E. Roman 656d24d4204SJose E. Roman PetscFunctionBegin; 6577fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 6580fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 6599566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 660d24d4204SJose E. Roman *B = Bs; 661d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 662792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 663d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 664d24d4204SJose E. Roman /* undo conjugation */ 6659371c9d4SSatish Balay for (i = 0; i < b->locr; i++) 6669371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 667d24d4204SJose E. Roman #endif 668d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 670d24d4204SJose E. Roman } 671d24d4204SJose E. Roman 672d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 673d71ae5a4SJacob Faibussowitsch { 674d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 675d24d4204SJose E. Roman PetscInt i, j; 676d24d4204SJose E. Roman 677d24d4204SJose E. Roman PetscFunctionBegin; 6789371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 6799371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 6803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 681d24d4204SJose E. Roman } 682d24d4204SJose E. Roman 683d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 684d71ae5a4SJacob Faibussowitsch { 685d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 686d24d4204SJose E. Roman Mat Bs = *B; 687d24d4204SJose E. Roman PetscBLASInt one = 1; 688d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 689d24d4204SJose E. Roman 690d24d4204SJose E. Roman PetscFunctionBegin; 6910fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 6929566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 693d24d4204SJose E. Roman *B = Bs; 694d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 695792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 696d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 698d24d4204SJose E. Roman } 699d24d4204SJose E. Roman 700d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 701d71ae5a4SJacob Faibussowitsch { 702d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 703d24d4204SJose E. Roman PetscScalar *x, *x2d; 704d24d4204SJose E. Roman const PetscInt *ranges; 705d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 706d24d4204SJose E. Roman 707d24d4204SJose E. Roman PetscFunctionBegin; 7089566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X)); 7099566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 710d24d4204SJose E. Roman 711d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 7129566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 7139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 714d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 715792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 716d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 717d24d4204SJose E. Roman 718d24d4204SJose E. Roman /* allocate 2d vector */ 719d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 7209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d)); 721d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 722d24d4204SJose E. Roman 723d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 724792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 725d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 726d24d4204SJose E. Roman 727d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 728792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 729d24d4204SJose E. Roman 730d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 731d24d4204SJose E. Roman switch (A->factortype) { 732d24d4204SJose E. Roman case MAT_FACTOR_LU: 733792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 734d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 735d24d4204SJose E. Roman break; 736d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 737792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 738d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 739d24d4204SJose E. Roman break; 740d71ae5a4SJacob Faibussowitsch default: 741d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 742d24d4204SJose E. Roman } 743d24d4204SJose E. Roman 744d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 745792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 746d24d4204SJose E. Roman 7479566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 7489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 7493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 750d24d4204SJose E. Roman } 751d24d4204SJose E. Roman 752d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 753d71ae5a4SJacob Faibussowitsch { 754d24d4204SJose E. Roman PetscFunctionBegin; 7559566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X)); 7569566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y)); 7573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 758d24d4204SJose E. Roman } 759d24d4204SJose E. Roman 760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 761d71ae5a4SJacob Faibussowitsch { 762d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 763d24d4204SJose E. Roman PetscBool flg1, flg2; 764d24d4204SJose E. Roman PetscBLASInt one = 1, info; 765d24d4204SJose E. Roman 766d24d4204SJose E. Roman PetscFunctionBegin; 7679566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 7689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 76908401ef6SPierre Jolivet PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 770d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B, 1, X, 2); 771d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)B->data; 772d24d4204SJose E. Roman x = (Mat_ScaLAPACK *)X->data; 7739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 774d24d4204SJose E. Roman 775d24d4204SJose E. Roman switch (A->factortype) { 776d24d4204SJose E. Roman case MAT_FACTOR_LU: 777792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 778d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 779d24d4204SJose E. Roman break; 780d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 781792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 782d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 783d24d4204SJose E. Roman break; 784d71ae5a4SJacob Faibussowitsch default: 785d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 786d24d4204SJose E. Roman } 7873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 788d24d4204SJose E. Roman } 789d24d4204SJose E. Roman 790d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 791d71ae5a4SJacob Faibussowitsch { 792d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 793d24d4204SJose E. Roman PetscBLASInt one = 1, info; 794d24d4204SJose E. Roman 795d24d4204SJose E. Roman PetscFunctionBegin; 796aa624791SPierre Jolivet if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 797792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 798d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info); 799d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 800d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 801d24d4204SJose E. Roman 8029566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8039566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 805d24d4204SJose E. Roman } 806d24d4204SJose E. Roman 807d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 808d71ae5a4SJacob Faibussowitsch { 809d24d4204SJose E. Roman PetscFunctionBegin; 8109566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8119566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 8123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 813d24d4204SJose E. Roman } 814d24d4204SJose E. Roman 815d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 816d71ae5a4SJacob Faibussowitsch { 817d24d4204SJose E. Roman PetscFunctionBegin; 818d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 820d24d4204SJose E. Roman } 821d24d4204SJose E. Roman 822d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 823d71ae5a4SJacob Faibussowitsch { 824d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 825d24d4204SJose E. Roman PetscBLASInt one = 1, info; 826d24d4204SJose E. Roman 827d24d4204SJose E. Roman PetscFunctionBegin; 828792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 829d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info); 830d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 831d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 832d24d4204SJose E. Roman 8339566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8349566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 836d24d4204SJose E. Roman } 837d24d4204SJose E. Roman 838d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 839d71ae5a4SJacob Faibussowitsch { 840d24d4204SJose E. Roman PetscFunctionBegin; 8419566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8429566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 8433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 844d24d4204SJose E. Roman } 845d24d4204SJose E. Roman 846d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 847d71ae5a4SJacob Faibussowitsch { 848d24d4204SJose E. Roman PetscFunctionBegin; 849d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 851d24d4204SJose E. Roman } 852d24d4204SJose E. Roman 85366976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 854d71ae5a4SJacob Faibussowitsch { 855d24d4204SJose E. Roman PetscFunctionBegin; 856d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 8573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 858d24d4204SJose E. Roman } 859d24d4204SJose E. Roman 860d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 861d71ae5a4SJacob Faibussowitsch { 862d24d4204SJose E. Roman Mat B; 86359172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 864d24d4204SJose E. Roman 865d24d4204SJose E. Roman PetscFunctionBegin; 866d24d4204SJose E. Roman /* Create the factorization matrix */ 8679566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 86866e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 869d24d4204SJose E. Roman B->factortype = ftype; 8709566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8719566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 872d24d4204SJose E. Roman 8739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 874d24d4204SJose E. Roman *F = B; 8753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 876d24d4204SJose E. Roman } 877d24d4204SJose E. Roman 878d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 879d71ae5a4SJacob Faibussowitsch { 880d24d4204SJose E. Roman PetscFunctionBegin; 8819566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 8829566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 884d24d4204SJose E. Roman } 885d24d4204SJose E. Roman 886d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 887d71ae5a4SJacob Faibussowitsch { 888d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 889d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0; 890d24d4204SJose E. Roman const char *ntype; 891d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy; 892d24d4204SJose E. Roman 893d24d4204SJose E. Roman PetscFunctionBegin; 894d24d4204SJose E. Roman switch (type) { 895d24d4204SJose E. Roman case NORM_1: 896d24d4204SJose E. Roman ntype = "1"; 897d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 898d24d4204SJose E. Roman break; 899d24d4204SJose E. Roman case NORM_FROBENIUS: 900d24d4204SJose E. Roman ntype = "F"; 901d24d4204SJose E. Roman work = &dummy; 902d24d4204SJose E. Roman break; 903d24d4204SJose E. Roman case NORM_INFINITY: 904d24d4204SJose E. Roman ntype = "I"; 905d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 906d24d4204SJose E. Roman break; 907d71ae5a4SJacob Faibussowitsch default: 908d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 909d24d4204SJose E. Roman } 9109566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 911d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 9129566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 9133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 914d24d4204SJose E. Roman } 915d24d4204SJose E. Roman 916d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 917d71ae5a4SJacob Faibussowitsch { 918d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 919d24d4204SJose E. Roman 920d24d4204SJose E. Roman PetscFunctionBegin; 9219566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 9223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 923d24d4204SJose E. Roman } 924d24d4204SJose E. Roman 925d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 926d71ae5a4SJacob Faibussowitsch { 927d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 928d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx; 929d24d4204SJose E. Roman 930d24d4204SJose E. Roman PetscFunctionBegin; 931d24d4204SJose E. Roman if (rows) { 932d24d4204SJose E. Roman n = a->locr; 933d24d4204SJose E. Roman nb = a->mb; 934d24d4204SJose E. Roman isrc = a->rsrc; 935d24d4204SJose E. Roman nproc = a->grid->nprow; 936d24d4204SJose E. Roman iproc = a->grid->myrow; 9379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 938d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9399566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 940d24d4204SJose E. Roman } 941d24d4204SJose E. Roman if (cols) { 942d24d4204SJose E. Roman n = a->locc; 943d24d4204SJose E. Roman nb = a->nb; 944d24d4204SJose E. Roman isrc = a->csrc; 945d24d4204SJose E. Roman nproc = a->grid->npcol; 946d24d4204SJose E. Roman iproc = a->grid->mycol; 9479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 948d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9499566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 950d24d4204SJose E. Roman } 9513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 952d24d4204SJose E. Roman } 953d24d4204SJose E. Roman 954d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 955d71ae5a4SJacob Faibussowitsch { 956d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 957d24d4204SJose E. Roman Mat Bmpi; 958d24d4204SJose E. Roman MPI_Comm comm; 959a00b6df3SJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 9604b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork; 9614b1a79daSJose E. Roman const PetscScalar *vwork; 962d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 963d24d4204SJose E. Roman PetscScalar *barray; 9644b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE; 9654b1a79daSJose E. Roman PetscMPIInt size; 966d24d4204SJose E. Roman 967d24d4204SJose E. Roman PetscFunctionBegin; 9689566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 9704b1a79daSJose E. Roman 9714b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9739566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 9749371c9d4SSatish Balay for (i = 0; i < size; i++) 9759371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) { 9769371c9d4SSatish Balay differ = PETSC_TRUE; 9779371c9d4SSatish Balay break; 9789371c9d4SSatish Balay } 9794b1a79daSJose E. Roman } 9804b1a79daSJose E. Roman 9814b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9829566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 9834b1a79daSJose E. Roman m = PETSC_DECIDE; 9849566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 9854b1a79daSJose E. Roman n = PETSC_DECIDE; 9869566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9879566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9889566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 9899566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 9904b1a79daSJose E. Roman 9914b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9929566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 993a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 994a00b6df3SJose E. Roman lld = PetscMax(ldb, 1); /* local leading dimension */ 995792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 9964b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info); 9974b1a79daSJose E. Roman 9984b1a79daSJose E. Roman /* redistribute matrix */ 9999566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1000792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10019566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 10029566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10039566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 10044b1a79daSJose E. Roman 10054b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 10069566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 10074b1a79daSJose E. Roman for (i = rstart; i < rend; i++) { 10089566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 10099566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 10109566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 10114b1a79daSJose E. Roman } 10129566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 10139566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 10149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 10154b1a79daSJose E. Roman 10164b1a79daSJose E. Roman } else { /* normal cases */ 1017d24d4204SJose E. Roman 1018d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1019d24d4204SJose E. Roman else { 10209566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 102192c846b4SJose E. Roman m = PETSC_DECIDE; 10229566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 102392c846b4SJose E. Roman n = PETSC_DECIDE; 10249566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10259566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10269566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 10279566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1028d24d4204SJose E. Roman } 1029d24d4204SJose E. Roman 1030d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10319566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1032a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1033a00b6df3SJose E. Roman lld = PetscMax(ldb, 1); /* local leading dimension */ 1034792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1035d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1036d24d4204SJose E. Roman 1037d24d4204SJose E. Roman /* redistribute matrix */ 10389566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1039792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10409566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1041d24d4204SJose E. Roman 10429566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10439566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1044d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10459566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1046d24d4204SJose E. Roman } else *B = Bmpi; 10474b1a79daSJose E. Roman } 10483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1049d24d4204SJose E. Roman } 1050d24d4204SJose E. Roman 1051d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1052d71ae5a4SJacob Faibussowitsch { 1053b12397e7SPierre Jolivet const PetscInt *ranges; 1054b12397e7SPierre Jolivet PetscMPIInt size; 1055b12397e7SPierre Jolivet PetscInt i, n; 1056b12397e7SPierre Jolivet 1057b12397e7SPierre Jolivet PetscFunctionBegin; 1058b12397e7SPierre Jolivet *correct = PETSC_TRUE; 1059b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1060b12397e7SPierre Jolivet if (size > 1) { 1061b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges)); 1062b12397e7SPierre Jolivet n = ranges[1] - ranges[0]; 10639371c9d4SSatish Balay for (i = 1; i < size; i++) 10649371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break; 1065b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1066b12397e7SPierre Jolivet } 10673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1068b12397e7SPierre Jolivet } 1069b12397e7SPierre Jolivet 1070d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1071d71ae5a4SJacob Faibussowitsch { 1072d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1073d24d4204SJose E. Roman Mat Bmpi; 1074d24d4204SJose E. Roman MPI_Comm comm; 107592c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1076b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols; 1077d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1078d24d4204SJose E. Roman PetscScalar *aarray; 1079b12397e7SPierre Jolivet IS ir, ic; 10804e8b52a1SJose E. Roman PetscInt lda; 1081b12397e7SPierre Jolivet PetscBool flg; 1082d24d4204SJose E. Roman 1083d24d4204SJose E. Roman PetscFunctionBegin; 10849566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1085d24d4204SJose E. Roman 1086d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1087d24d4204SJose E. Roman else { 10889566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 108992c846b4SJose E. Roman m = PETSC_DECIDE; 10909566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 109192c846b4SJose E. Roman n = PETSC_DECIDE; 10929566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10939566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10949566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 10959566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1096d24d4204SJose E. Roman } 1097d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data; 1098d24d4204SJose E. Roman 1099b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 1100b12397e7SPierre Jolivet PetscCall(MatDenseGetArray(A, &aarray)); 1101b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1102b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1103b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1104d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 11059566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 11069566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 11074e8b52a1SJose E. Roman lld = PetscMax(lda, 1); /* local leading dimension */ 1108792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1109d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1110d24d4204SJose E. Roman 1111d24d4204SJose E. Roman /* redistribute matrix */ 1112792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1113b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1114b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1115b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1116b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1117b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1118b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows)); 1119b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols)); 1120b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1121b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows)); 1122b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols)); 1123b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1124b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1125b12397e7SPierre Jolivet } 11269566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A, &aarray)); 11279566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 11289566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1129d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 11309566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1131d24d4204SJose E. Roman } else *B = Bmpi; 11323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1133d24d4204SJose E. Roman } 1134d24d4204SJose E. Roman 1135d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1136d71ae5a4SJacob Faibussowitsch { 1137d24d4204SJose E. Roman Mat mat_scal; 1138d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1139d24d4204SJose E. Roman const PetscInt *cols; 1140d24d4204SJose E. Roman const PetscScalar *vals; 1141d24d4204SJose E. Roman 1142d24d4204SJose E. Roman PetscFunctionBegin; 1143d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1144d24d4204SJose E. Roman mat_scal = *newmat; 11459566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1146d24d4204SJose E. Roman } else { 11479566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1148d24d4204SJose E. Roman m = PETSC_DECIDE; 11499566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1150d24d4204SJose E. Roman n = PETSC_DECIDE; 11519566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11529566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11539566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11549566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1155d24d4204SJose E. Roman } 1156d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11579566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11589566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 11599566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1160d24d4204SJose E. Roman } 11619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1163d24d4204SJose E. Roman 11649566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1165d24d4204SJose E. Roman else *newmat = mat_scal; 11663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1167d24d4204SJose E. Roman } 1168d24d4204SJose E. Roman 1169d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1170d71ae5a4SJacob Faibussowitsch { 1171d24d4204SJose E. Roman Mat mat_scal; 1172d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1173d24d4204SJose E. Roman const PetscInt *cols; 1174d24d4204SJose E. Roman const PetscScalar *vals; 1175d24d4204SJose E. Roman PetscScalar v; 1176d24d4204SJose E. Roman 1177d24d4204SJose E. Roman PetscFunctionBegin; 1178d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1179d24d4204SJose E. Roman mat_scal = *newmat; 11809566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1181d24d4204SJose E. Roman } else { 11829566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1183d24d4204SJose E. Roman m = PETSC_DECIDE; 11849566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1185d24d4204SJose E. Roman n = PETSC_DECIDE; 11869566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11879566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11889566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11899566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1190d24d4204SJose E. Roman } 11919566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1192d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11939566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11949566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1195d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */ 1196d24d4204SJose E. Roman if (cols[j] == row) continue; 1197b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 11989566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1199d24d4204SJose E. Roman } 12009566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1201d24d4204SJose E. Roman } 12029566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 12039566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 12049566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1205d24d4204SJose E. Roman 12069566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1207d24d4204SJose E. Roman else *newmat = mat_scal; 12083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1209d24d4204SJose E. Roman } 1210d24d4204SJose E. Roman 1211d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1212d71ae5a4SJacob Faibussowitsch { 1213d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1214d24d4204SJose E. Roman PetscInt sz = 0; 1215d24d4204SJose E. Roman 1216d24d4204SJose E. Roman PetscFunctionBegin; 12179566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12189566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1219d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1220d24d4204SJose E. Roman 12219566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12229566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 12239566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc)); 1224d24d4204SJose E. Roman 1225d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 12263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1227d24d4204SJose E. Roman } 1228d24d4204SJose E. Roman 1229d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1230d71ae5a4SJacob Faibussowitsch { 1231d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1232d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1233d24d4204SJose E. Roman PetscBool flg; 1234d24d4204SJose E. Roman MPI_Comm icomm; 1235d24d4204SJose E. Roman 1236d24d4204SJose E. Roman PetscFunctionBegin; 12379566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 12389566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12399566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 12409566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 12419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1242d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1243d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1244d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1245d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 12469566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 12479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1248d24d4204SJose E. Roman } 12499566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 12509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 12519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 12539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 12549566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 12553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1256d24d4204SJose E. Roman } 1257d24d4204SJose E. Roman 125866976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1259d71ae5a4SJacob Faibussowitsch { 1260d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1261d24d4204SJose E. Roman PetscBLASInt info = 0; 1262b12397e7SPierre Jolivet PetscBool flg; 1263d24d4204SJose E. Roman 1264d24d4204SJose E. Roman PetscFunctionBegin; 12659566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12669566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1267d24d4204SJose E. Roman 1268b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1269b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1270b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1271b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1272b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1273d24d4204SJose E. Roman 1274d24d4204SJose E. Roman /* compute local sizes */ 12759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 12769566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1277d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1278d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1279d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr); 1280d24d4204SJose E. Roman 1281d24d4204SJose E. Roman /* allocate local array */ 12829566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1283d24d4204SJose E. Roman 1284d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1285792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1286d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 12873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1288d24d4204SJose E. Roman } 1289d24d4204SJose E. Roman 129066976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1291d71ae5a4SJacob Faibussowitsch { 1292d24d4204SJose E. Roman PetscInt nstash, reallocs; 1293d24d4204SJose E. Roman 1294d24d4204SJose E. Roman PetscFunctionBegin; 12953ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 12969566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 12979566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 12989566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 12993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1300d24d4204SJose E. Roman } 1301d24d4204SJose E. Roman 130266976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1303d71ae5a4SJacob Faibussowitsch { 1304d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1305d24d4204SJose E. Roman PetscMPIInt n; 1306d24d4204SJose E. Roman PetscInt i, flg, *row, *col; 1307d24d4204SJose E. Roman PetscScalar *val; 1308d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1309d24d4204SJose E. Roman 1310d24d4204SJose E. Roman PetscFunctionBegin; 13113ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1312d24d4204SJose E. Roman while (1) { 13139566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1314d24d4204SJose E. Roman if (!flg) break; 1315d24d4204SJose E. Roman for (i = 0; i < n; i++) { 13169566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 13179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1318792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1319aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1320d24d4204SJose E. Roman switch (A->insertmode) { 1321d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 1322d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1323d71ae5a4SJacob Faibussowitsch break; 1324d71ae5a4SJacob Faibussowitsch case ADD_VALUES: 1325d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1326d71ae5a4SJacob Faibussowitsch break; 1327d71ae5a4SJacob Faibussowitsch default: 1328d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1329d24d4204SJose E. Roman } 1330d24d4204SJose E. Roman } 1331d24d4204SJose E. Roman } 13329566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 13333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1334d24d4204SJose E. Roman } 1335d24d4204SJose E. Roman 133666976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1337d71ae5a4SJacob Faibussowitsch { 1338d24d4204SJose E. Roman Mat Adense, As; 1339d24d4204SJose E. Roman MPI_Comm comm; 1340d24d4204SJose E. Roman 1341d24d4204SJose E. Roman PetscFunctionBegin; 13429566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 13439566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense)); 13449566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE)); 13459566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer)); 13469566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 13479566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 13489566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As)); 13493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1350d24d4204SJose E. Roman } 1351d24d4204SJose E. Roman 13529371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman 0, 1355d24d4204SJose E. Roman MatMult_ScaLAPACK, 1356d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1357d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1358d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1359d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1360d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman /*10*/ 0, 1363d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1364d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1365d24d4204SJose E. Roman 0, 1366d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1367d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1368d24d4204SJose E. Roman 0, 1369d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1370d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1371d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1372d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1373d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1374d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1375d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1376d24d4204SJose E. Roman /*24*/ 0, 1377d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1378d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1379d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1380d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1381d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman 0, 1385d24d4204SJose E. Roman 0, 1386d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman 0, 1390d24d4204SJose E. Roman 0, 1391d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman 0, 1395d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1396d24d4204SJose E. Roman /*44*/ 0, 1397d24d4204SJose E. Roman MatScale_ScaLAPACK, 1398d24d4204SJose E. Roman MatShift_ScaLAPACK, 1399d24d4204SJose E. Roman 0, 1400d24d4204SJose E. Roman 0, 1401d24d4204SJose E. Roman /*49*/ 0, 1402d24d4204SJose E. Roman 0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman 0, 1405d24d4204SJose E. Roman 0, 1406d24d4204SJose E. Roman /*54*/ 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman 0, 1409d24d4204SJose E. Roman 0, 1410d24d4204SJose E. Roman 0, 1411d24d4204SJose E. Roman /*59*/ 0, 1412d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1413d24d4204SJose E. Roman MatView_ScaLAPACK, 1414d24d4204SJose E. Roman 0, 1415d24d4204SJose E. Roman 0, 1416d24d4204SJose E. Roman /*64*/ 0, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman 0, 1420d24d4204SJose E. Roman 0, 1421d24d4204SJose E. Roman /*69*/ 0, 1422d24d4204SJose E. Roman 0, 1423d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1424d24d4204SJose E. Roman 0, 1425d24d4204SJose E. Roman 0, 1426d24d4204SJose E. Roman /*74*/ 0, 1427d24d4204SJose E. Roman 0, 1428d24d4204SJose E. Roman 0, 1429d24d4204SJose E. Roman 0, 1430d24d4204SJose E. Roman 0, 1431d24d4204SJose E. Roman /*79*/ 0, 1432d24d4204SJose E. Roman 0, 1433d24d4204SJose E. Roman 0, 1434d24d4204SJose E. Roman 0, 1435d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1436d24d4204SJose E. Roman /*84*/ 0, 1437d24d4204SJose E. Roman 0, 1438d24d4204SJose E. Roman 0, 1439d24d4204SJose E. Roman 0, 1440d24d4204SJose E. Roman 0, 1441d24d4204SJose E. Roman /*89*/ 0, 1442d24d4204SJose E. Roman 0, 1443d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1444d24d4204SJose E. Roman 0, 1445d24d4204SJose E. Roman 0, 1446d24d4204SJose E. Roman /*94*/ 0, 1447d24d4204SJose E. Roman 0, 1448d24d4204SJose E. Roman 0, 1449d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1450d24d4204SJose E. Roman 0, 1451d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1452d24d4204SJose E. Roman 0, 1453d24d4204SJose E. Roman 0, 1454d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1455d24d4204SJose E. Roman 0, 1456d24d4204SJose E. Roman /*104*/ 0, 1457d24d4204SJose E. Roman 0, 1458d24d4204SJose E. Roman 0, 1459d24d4204SJose E. Roman 0, 1460d24d4204SJose E. Roman 0, 1461d24d4204SJose E. Roman /*109*/ MatMatSolve_ScaLAPACK, 1462d24d4204SJose E. Roman 0, 1463d24d4204SJose E. Roman 0, 1464d24d4204SJose E. Roman 0, 1465d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1466d24d4204SJose E. Roman /*114*/ 0, 1467d24d4204SJose E. Roman 0, 1468d24d4204SJose E. Roman 0, 1469d24d4204SJose E. Roman 0, 1470d24d4204SJose E. Roman 0, 1471d24d4204SJose E. Roman /*119*/ 0, 1472d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1473*5e4d33a3SBlanca Mellado Pinto MatMultHermitianTranspose_ScaLAPACK, 1474*5e4d33a3SBlanca Mellado Pinto MatMultHermitianTransposeAdd_ScaLAPACK, 1475d24d4204SJose E. Roman 0, 1476d24d4204SJose E. Roman /*124*/ 0, 1477d24d4204SJose E. Roman 0, 1478d24d4204SJose E. Roman 0, 1479d24d4204SJose E. Roman 0, 1480d24d4204SJose E. Roman 0, 1481d24d4204SJose E. Roman /*129*/ 0, 1482d24d4204SJose E. Roman 0, 1483d24d4204SJose E. Roman 0, 1484d24d4204SJose E. Roman 0, 1485d24d4204SJose E. Roman 0, 1486d24d4204SJose E. Roman /*134*/ 0, 1487d24d4204SJose E. Roman 0, 1488d24d4204SJose E. Roman 0, 1489d24d4204SJose E. Roman 0, 1490d24d4204SJose E. Roman 0, 1491d24d4204SJose E. Roman 0, 1492d24d4204SJose E. Roman /*140*/ 0, 1493d24d4204SJose E. Roman 0, 1494d24d4204SJose E. Roman 0, 1495d24d4204SJose E. Roman 0, 1496d24d4204SJose E. Roman 0, 1497d24d4204SJose E. Roman /*145*/ 0, 1498d24d4204SJose E. Roman 0, 149999a7f59eSMark Adams 0, 150099a7f59eSMark Adams 0, 15017fb60732SBarry Smith 0, 1502ed6168d8SPierre Jolivet /*150*/ 0, 1503ed6168d8SPierre Jolivet 0}; 1504d24d4204SJose E. Roman 1505d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1506d71ae5a4SJacob Faibussowitsch { 1507d24d4204SJose E. Roman PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1508d24d4204SJose E. Roman PetscInt size = stash->size, nsends; 1509d24d4204SJose E. Roman PetscInt count, *sindices, **rindices, i, j, l; 1510d24d4204SJose E. Roman PetscScalar **rvalues, *svalues; 1511d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1512d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1513d24d4204SJose E. Roman PetscMPIInt *sizes, *nlengths, nreceives; 1514d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy; 1515d24d4204SJose E. Roman PetscScalar *sp_val; 1516d24d4204SJose E. Roman PetscMatStashSpace space, space_next; 1517d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1518d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1519d24d4204SJose E. Roman 1520d24d4204SJose E. Roman PetscFunctionBegin; 1521d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1522d24d4204SJose E. Roman InsertMode addv; 15231c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 152408401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1525d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1526d24d4204SJose E. Roman } 1527d24d4204SJose E. Roman 1528d24d4204SJose E. Roman bs2 = stash->bs * stash->bs; 1529d24d4204SJose E. Roman 1530d24d4204SJose E. Roman /* first count number of contributors to each processor */ 15319566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 15329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1533d24d4204SJose E. Roman 1534d24d4204SJose E. Roman i = j = 0; 1535d24d4204SJose E. Roman space = stash->space_head; 1536d24d4204SJose E. Roman while (space) { 1537d24d4204SJose E. Roman space_next = space->next; 1538d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 15399566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 15409566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1541792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1542d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 15439371c9d4SSatish Balay nlengths[j]++; 15449371c9d4SSatish Balay owner[i] = j; 1545d24d4204SJose E. Roman i++; 1546d24d4204SJose E. Roman } 1547d24d4204SJose E. Roman space = space_next; 1548d24d4204SJose E. Roman } 1549d24d4204SJose E. Roman 1550d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 15519566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 1552d24d4204SJose E. Roman for (i = 0, nsends = 0; i < size; i++) { 1553d24d4204SJose E. Roman if (nlengths[i]) { 15549371c9d4SSatish Balay sizes[i] = 1; 15559371c9d4SSatish Balay nsends++; 1556d24d4204SJose E. Roman } 1557d24d4204SJose E. Roman } 1558d24d4204SJose E. Roman 15599371c9d4SSatish Balay { 15609371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 1561d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 15629566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 15639566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1564d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1565d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] *= 2; 15669566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1567d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1568d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 15699566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 15709566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 15719371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 15729371c9d4SSatish Balay } 1573d24d4204SJose E. Roman 1574d24d4204SJose E. Roman /* do sends: 1575d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1576d24d4204SJose E. Roman the ith processor 1577d24d4204SJose E. Roman */ 15789566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 15799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 15809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1581d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 15829371c9d4SSatish Balay startv[0] = 0; 15839371c9d4SSatish Balay starti[0] = 0; 1584d24d4204SJose E. Roman for (i = 1; i < size; i++) { 1585d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1]; 1586d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1587d24d4204SJose E. Roman } 1588d24d4204SJose E. Roman 1589d24d4204SJose E. Roman i = 0; 1590d24d4204SJose E. Roman space = stash->space_head; 1591d24d4204SJose E. Roman while (space) { 1592d24d4204SJose E. Roman space_next = space->next; 1593d24d4204SJose E. Roman sp_idx = space->idx; 1594d24d4204SJose E. Roman sp_idy = space->idy; 1595d24d4204SJose E. Roman sp_val = space->val; 1596d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 1597d24d4204SJose E. Roman j = owner[i]; 1598d24d4204SJose E. Roman if (bs2 == 1) { 1599d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1600d24d4204SJose E. Roman } else { 1601d24d4204SJose E. Roman PetscInt k; 1602d24d4204SJose E. Roman PetscScalar *buf1, *buf2; 1603d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j]; 1604d24d4204SJose E. Roman buf2 = space->val + bs2 * l; 1605d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1606d24d4204SJose E. Roman } 1607d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1608d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1609d24d4204SJose E. Roman startv[j]++; 1610d24d4204SJose E. Roman starti[j]++; 1611d24d4204SJose E. Roman i++; 1612d24d4204SJose E. Roman } 1613d24d4204SJose E. Roman space = space_next; 1614d24d4204SJose E. Roman } 1615d24d4204SJose E. Roman startv[0] = 0; 1616d24d4204SJose E. Roman for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1617d24d4204SJose E. Roman 1618d24d4204SJose E. Roman for (i = 0, count = 0; i < size; i++) { 1619d24d4204SJose E. Roman if (sizes[i]) { 16209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 16219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1622d24d4204SJose E. Roman } 1623d24d4204SJose E. Roman } 1624d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 16259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1626d24d4204SJose E. Roman for (i = 0; i < size; i++) { 162748a46eb9SPierre Jolivet if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1628d24d4204SJose E. Roman } 1629d24d4204SJose E. Roman #endif 16309566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 16319566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 16329566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 16339566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1634d24d4204SJose E. Roman 1635d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 16369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1637d24d4204SJose E. Roman 1638d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) { 1639d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i]; 1640d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i]; 1641d24d4204SJose E. Roman } 1642d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1643d24d4204SJose E. Roman 16449566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 16459566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1646d24d4204SJose E. Roman 1647d24d4204SJose E. Roman stash->svalues = svalues; 1648d24d4204SJose E. Roman stash->sindices = sindices; 1649d24d4204SJose E. Roman stash->rvalues = rvalues; 1650d24d4204SJose E. Roman stash->rindices = rindices; 1651d24d4204SJose E. Roman stash->send_waits = send_waits; 1652d24d4204SJose E. Roman stash->nsends = nsends; 1653d24d4204SJose E. Roman stash->nrecvs = nreceives; 1654d24d4204SJose E. Roman stash->reproduce_count = 0; 16553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1656d24d4204SJose E. Roman } 1657d24d4204SJose E. Roman 1658d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1659d71ae5a4SJacob Faibussowitsch { 1660d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1661d24d4204SJose E. Roman 1662d24d4204SJose E. Roman PetscFunctionBegin; 166328b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1664aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1665aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 16669566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 16679566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 16683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1669d24d4204SJose E. Roman } 1670d24d4204SJose E. Roman 1671d24d4204SJose E. Roman /*@ 16726aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 167311a5261eSBarry Smith the `MATSCALAPACK` matrix 1674d24d4204SJose E. Roman 1675c3339decSBarry Smith Logically Collective 1676d24d4204SJose E. Roman 1677d8d19677SJose E. Roman Input Parameters: 167811a5261eSBarry Smith + A - a `MATSCALAPACK` matrix 1679d24d4204SJose E. Roman . mb - the row block size 1680d24d4204SJose E. Roman - nb - the column block size 1681d24d4204SJose E. Roman 1682d24d4204SJose E. Roman Level: intermediate 1683d24d4204SJose E. Roman 16842ef1f0ffSBarry Smith Note: 16852ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 16862ef1f0ffSBarry Smith 16871cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1688d24d4204SJose E. Roman @*/ 1689d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1690d71ae5a4SJacob Faibussowitsch { 1691d24d4204SJose E. Roman PetscFunctionBegin; 1692d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1693d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2); 1694d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3); 1695cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 16963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1697d24d4204SJose E. Roman } 1698d24d4204SJose E. Roman 1699d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1700d71ae5a4SJacob Faibussowitsch { 1701d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1702d24d4204SJose E. Roman 1703d24d4204SJose E. Roman PetscFunctionBegin; 1704d24d4204SJose E. Roman if (mb) *mb = a->mb; 1705d24d4204SJose E. Roman if (nb) *nb = a->nb; 17063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1707d24d4204SJose E. Roman } 1708d24d4204SJose E. Roman 1709d24d4204SJose E. Roman /*@ 17106aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 171111a5261eSBarry Smith the `MATSCALAPACK` matrix 1712d24d4204SJose E. Roman 17132ef1f0ffSBarry Smith Not Collective 1714d24d4204SJose E. Roman 1715d24d4204SJose E. Roman Input Parameter: 171611a5261eSBarry Smith . A - a `MATSCALAPACK` matrix 1717d24d4204SJose E. Roman 1718d24d4204SJose E. Roman Output Parameters: 1719d24d4204SJose E. Roman + mb - the row block size 1720d24d4204SJose E. Roman - nb - the column block size 1721d24d4204SJose E. Roman 1722d24d4204SJose E. Roman Level: intermediate 1723d24d4204SJose E. Roman 17242ef1f0ffSBarry Smith Note: 17252ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 17262ef1f0ffSBarry Smith 17271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1728d24d4204SJose E. Roman @*/ 1729d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1730d71ae5a4SJacob Faibussowitsch { 1731d24d4204SJose E. Roman PetscFunctionBegin; 1732d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1733cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 17343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1735d24d4204SJose E. Roman } 1736d24d4204SJose E. Roman 1737d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1738d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1739d24d4204SJose E. Roman 1740d24d4204SJose E. Roman /*MC 1741d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1742d24d4204SJose E. Roman 17432ef1f0ffSBarry Smith Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1744d24d4204SJose E. Roman 1745d24d4204SJose E. Roman Options Database Keys: 17462ef1f0ffSBarry Smith + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 17472ef1f0ffSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1748d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1749d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1750d24d4204SJose E. Roman 17512ef1f0ffSBarry Smith Level: intermediate 17522ef1f0ffSBarry Smith 175389bba20eSBarry Smith Note: 175489bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 175589bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 175689bba20eSBarry Smith the given rank. 175789bba20eSBarry Smith 17581cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1759d24d4204SJose E. Roman M*/ 1760d24d4204SJose E. Roman 1761d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1762d71ae5a4SJacob Faibussowitsch { 1763d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1764d24d4204SJose E. Roman PetscBool flg, flg1; 1765d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1766d24d4204SJose E. Roman MPI_Comm icomm; 1767d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol; 1768d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0}; 1769d24d4204SJose E. Roman PetscMPIInt size; 1770d24d4204SJose E. Roman 1771d24d4204SJose E. Roman PetscFunctionBegin; 1772aea10558SJacob Faibussowitsch A->ops[0] = MatOps_Values; 1773d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1774d24d4204SJose E. Roman 17759566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1776d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1777d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1778d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1779d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1780d24d4204SJose E. Roman 17814dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1782d24d4204SJose E. Roman A->data = (void *)a; 1783d24d4204SJose E. Roman 1784d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1785d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 17869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 17879566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 17889566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1789d24d4204SJose E. Roman } 17909566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 17919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1792d24d4204SJose E. Roman if (!flg) { 17934dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&grid)); 1794d24d4204SJose E. Roman 17959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size)); 1796d24d4204SJose E. Roman grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1797d24d4204SJose E. Roman 1798d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 17999566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1800d24d4204SJose E. Roman if (flg1) { 180108401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1802d24d4204SJose E. Roman grid->nprow = optv1; 1803d24d4204SJose E. Roman } 1804d0609cedSBarry Smith PetscOptionsEnd(); 1805d24d4204SJose E. Roman 1806d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1807d24d4204SJose E. Roman grid->npcol = size / grid->nprow; 18089566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 18099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1810f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1811d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1812d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1813d24d4204SJose E. Roman grid->grid_refct = 1; 1814d24d4204SJose E. Roman grid->nprow = nprow; 1815d24d4204SJose E. Roman grid->npcol = npcol; 1816d24d4204SJose E. Roman grid->myrow = myrow; 1817d24d4204SJose E. Roman grid->mycol = mycol; 1818d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1819f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1820d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1821f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1822d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 18239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1824d24d4204SJose E. Roman 1825d24d4204SJose E. Roman } else grid->grid_refct++; 18269566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1827d24d4204SJose E. Roman a->grid = grid; 1828d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1829d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1830d24d4204SJose E. Roman 1831d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 18329566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1833d24d4204SJose E. Roman if (flg) { 1834d24d4204SJose E. Roman a->mb = array[0]; 1835d24d4204SJose E. Roman a->nb = (k > 1) ? array[1] : a->mb; 1836d24d4204SJose E. Roman } 1837d0609cedSBarry Smith PetscOptionsEnd(); 1838d24d4204SJose E. Roman 1839b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 18409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 18419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 18429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 18439566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 18443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1845d24d4204SJose E. Roman } 1846d24d4204SJose E. Roman 1847d24d4204SJose E. Roman /*@C 1848d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 184911a5261eSBarry Smith (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1850d24d4204SJose E. Roman 1851d24d4204SJose E. Roman Collective 1852d24d4204SJose E. Roman 1853d24d4204SJose E. Roman Input Parameters: 1854d24d4204SJose E. Roman + comm - MPI communicator 185511a5261eSBarry Smith . mb - row block size (or `PETSC_DECIDE` to have it set) 185611a5261eSBarry Smith . nb - column block size (or `PETSC_DECIDE` to have it set) 1857d24d4204SJose E. Roman . M - number of global rows 1858d24d4204SJose E. Roman . N - number of global columns 1859d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1860d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1861d24d4204SJose E. Roman 1862d24d4204SJose E. Roman Output Parameter: 1863d24d4204SJose E. Roman . A - the matrix 1864d24d4204SJose E. Roman 186511a5261eSBarry Smith Options Database Key: 1866d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1867d24d4204SJose E. Roman 18682ef1f0ffSBarry Smith Level: intermediate 18692ef1f0ffSBarry Smith 18702ef1f0ffSBarry Smith Notes: 18712ef1f0ffSBarry Smith If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 18722ef1f0ffSBarry Smith 187311a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1874d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 187511a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1876d24d4204SJose E. Roman 1877d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1878d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1879d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 188011a5261eSBarry Smith used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1881d24d4204SJose E. Roman 18821cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1883d24d4204SJose E. Roman @*/ 1884d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1885d71ae5a4SJacob Faibussowitsch { 1886d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1887d24d4204SJose E. Roman PetscInt m, n; 1888d24d4204SJose E. Roman 1889d24d4204SJose E. Roman PetscFunctionBegin; 18909566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 18919566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK)); 1892aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1893d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1894d24d4204SJose E. Roman m = PETSC_DECIDE; 18959566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1896d24d4204SJose E. Roman n = PETSC_DECIDE; 18979566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 18989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1899d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data; 19009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M)); 19019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N)); 19029566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 19039566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 19049566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 19059566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 19069566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 19073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1908d24d4204SJose E. Roman } 1909