1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 229371c9d4SSatish Balay static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) { 23f7ec113fSDamian Marek PetscFunctionBegin; 249566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 26f7ec113fSDamian Marek PetscFunctionReturn(0); 27f7ec113fSDamian Marek } 28f7ec113fSDamian Marek 299371c9d4SSatish Balay static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) { 30d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 31d24d4204SJose E. Roman PetscBool iascii; 32d24d4204SJose E. Roman PetscViewerFormat format; 33d24d4204SJose E. Roman Mat Adense; 34d24d4204SJose E. Roman 35d24d4204SJose E. Roman PetscFunctionBegin; 369566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 37d24d4204SJose E. Roman if (iascii) { 389566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 39d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 44d24d4204SJose E. Roman PetscFunctionReturn(0); 45d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 46d24d4204SJose E. Roman PetscFunctionReturn(0); 47d24d4204SJose E. Roman } 48d24d4204SJose E. Roman } 49d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 509566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 519566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer)); 529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 53d24d4204SJose E. Roman PetscFunctionReturn(0); 54d24d4204SJose E. Roman } 55d24d4204SJose E. Roman 569371c9d4SSatish Balay static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) { 57d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 58d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2]; 59d24d4204SJose E. Roman 60d24d4204SJose E. Roman PetscFunctionBegin; 61d24d4204SJose E. Roman info->block_size = 1.0; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */ 64d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */ 65d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 66d24d4204SJose E. Roman info->nz_allocated = isend[0]; 67d24d4204SJose E. Roman info->nz_used = isend[1]; 68d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 6957168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 70d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 71d24d4204SJose E. Roman info->nz_used = irecv[1]; 72d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 7357168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 74d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 75d24d4204SJose E. Roman info->nz_used = irecv[1]; 76d24d4204SJose E. Roman } 77d24d4204SJose E. Roman 78d24d4204SJose E. Roman info->nz_unneeded = 0; 79d24d4204SJose E. Roman info->assemblies = A->num_ass; 80d24d4204SJose E. Roman info->mallocs = 0; 81d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 82d24d4204SJose E. Roman info->fill_ratio_given = 0; 83d24d4204SJose E. Roman info->fill_ratio_needed = 0; 84d24d4204SJose E. Roman info->factor_mallocs = 0; 85d24d4204SJose E. Roman PetscFunctionReturn(0); 86d24d4204SJose E. Roman } 87d24d4204SJose E. Roman 889371c9d4SSatish Balay PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) { 89b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 90b12397e7SPierre Jolivet 91d24d4204SJose E. Roman PetscFunctionBegin; 92d24d4204SJose E. Roman switch (op) { 93d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 94d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 95d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 96d24d4204SJose E. Roman case MAT_SYMMETRIC: 97d24d4204SJose E. Roman case MAT_SORTED_FULL: 989371c9d4SSatish Balay case MAT_HERMITIAN: break; 999371c9d4SSatish Balay case MAT_ROW_ORIENTED: a->roworiented = flg; break; 1009371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 101d24d4204SJose E. Roman } 102d24d4204SJose E. Roman PetscFunctionReturn(0); 103d24d4204SJose E. Roman } 104d24d4204SJose E. Roman 1059371c9d4SSatish Balay static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) { 106d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 107d24d4204SJose E. Roman PetscInt i, j; 108d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 109b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 110d24d4204SJose E. Roman 111d24d4204SJose E. Roman PetscFunctionBegin; 112b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 113d24d4204SJose E. Roman for (i = 0; i < nr; i++) { 114d24d4204SJose E. Roman if (rows[i] < 0) continue; 1159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 116d24d4204SJose E. Roman for (j = 0; j < nc; j++) { 117d24d4204SJose E. Roman if (cols[j] < 0) continue; 1189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 119792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 120d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 121b12397e7SPierre Jolivet if (roworiented) { 122d24d4204SJose E. Roman switch (imode) { 123d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break; 124b12397e7SPierre Jolivet default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break; 125b12397e7SPierre Jolivet } 126b12397e7SPierre Jolivet } else { 127b12397e7SPierre Jolivet switch (imode) { 128b12397e7SPierre Jolivet case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break; 129b12397e7SPierre Jolivet default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break; 130b12397e7SPierre Jolivet } 131d24d4204SJose E. Roman } 132d24d4204SJose E. Roman } else { 13328b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 134d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 135b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 136d24d4204SJose E. Roman } 137d24d4204SJose E. Roman } 138d24d4204SJose E. Roman } 139d24d4204SJose E. Roman PetscFunctionReturn(0); 140d24d4204SJose E. Roman } 141d24d4204SJose E. Roman 1429371c9d4SSatish Balay static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) { 143d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 144d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0; 145d24d4204SJose E. Roman const PetscInt *ranges; 146d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 147d24d4204SJose E. Roman 148d24d4204SJose E. Roman PetscFunctionBegin; 149d24d4204SJose E. Roman if (transpose) { 150d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1519566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1529566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 153d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 154792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 155d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1569566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1579566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 158d24d4204SJose E. Roman ylld = 1; 159792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 160d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 161d24d4204SJose E. Roman 162d24d4204SJose E. Roman /* allocate 2d vectors */ 163d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 164d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1659566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 166d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 167d24d4204SJose E. Roman 168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 169792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 170d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 171792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 172d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 173d24d4204SJose E. Roman 174d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 175792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 176d24d4204SJose E. Roman 177d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 178792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 179d24d4204SJose E. Roman 180d24d4204SJose E. Roman /* call PBLAS subroutine */ 181792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 182d24d4204SJose E. Roman 183d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 184792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman } else { /* non-transpose */ 187d24d4204SJose E. Roman 188d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1899566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1909566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 191d24d4204SJose E. Roman xlld = 1; 192792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 193d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1949566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1959566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 196d24d4204SJose E. Roman ylld = PetscMax(1, A->rmap->n); 197792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 198d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 199d24d4204SJose E. Roman 200d24d4204SJose E. Roman /* allocate 2d vectors */ 201d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 202d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 2039566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 204d24d4204SJose E. Roman ylld = PetscMax(1, lszy); 205d24d4204SJose E. Roman 206d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 207792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 208d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 209792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 210d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 211d24d4204SJose E. Roman 212d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 213792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 214d24d4204SJose E. Roman 215d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 216792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 217d24d4204SJose E. Roman 218d24d4204SJose E. Roman /* call PBLAS subroutine */ 219792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 220d24d4204SJose E. Roman 221d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 222792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 223d24d4204SJose E. Roman } 2249566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d)); 225d24d4204SJose E. Roman PetscFunctionReturn(0); 226d24d4204SJose E. Roman } 227d24d4204SJose E. Roman 2289371c9d4SSatish Balay static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) { 229d24d4204SJose E. Roman const PetscScalar *xarray; 230d24d4204SJose E. Roman PetscScalar *yarray; 231d24d4204SJose E. Roman 232d24d4204SJose E. Roman PetscFunctionBegin; 2339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2349566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2359566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 2369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 238d24d4204SJose E. Roman PetscFunctionReturn(0); 239d24d4204SJose E. Roman } 240d24d4204SJose E. Roman 2419371c9d4SSatish Balay static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) { 242d24d4204SJose E. Roman const PetscScalar *xarray; 243d24d4204SJose E. Roman PetscScalar *yarray; 244d24d4204SJose E. Roman 245d24d4204SJose E. Roman PetscFunctionBegin; 2469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2479566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2489566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 2499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 251d24d4204SJose E. Roman PetscFunctionReturn(0); 252d24d4204SJose E. Roman } 253d24d4204SJose E. Roman 2549371c9d4SSatish Balay static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { 255d24d4204SJose E. Roman const PetscScalar *xarray; 256d24d4204SJose E. Roman PetscScalar *zarray; 257d24d4204SJose E. Roman 258d24d4204SJose E. Roman PetscFunctionBegin; 2599566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2609566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2619566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2629566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 2639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 265d24d4204SJose E. Roman PetscFunctionReturn(0); 266d24d4204SJose E. Roman } 267d24d4204SJose E. Roman 2689371c9d4SSatish Balay static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { 269d24d4204SJose E. Roman const PetscScalar *xarray; 270d24d4204SJose E. Roman PetscScalar *zarray; 271d24d4204SJose E. Roman 272d24d4204SJose E. Roman PetscFunctionBegin; 2739566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2759566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2769566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 2779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2789566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 279d24d4204SJose E. Roman PetscFunctionReturn(0); 280d24d4204SJose E. Roman } 281d24d4204SJose E. Roman 2829371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { 283d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 284d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 285d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 286d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 287d24d4204SJose E. Roman PetscBLASInt one = 1; 288d24d4204SJose E. Roman 289d24d4204SJose E. Roman PetscFunctionBegin; 290792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 291d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 292d24d4204SJose E. Roman PetscFunctionReturn(0); 293d24d4204SJose E. Roman } 294d24d4204SJose E. Roman 2959371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { 296d24d4204SJose E. Roman PetscFunctionBegin; 2979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2989566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 2999566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 300d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 301d24d4204SJose E. Roman PetscFunctionReturn(0); 302d24d4204SJose E. Roman } 303d24d4204SJose E. Roman 3049371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { 305d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 306d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 307d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 308d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 309d24d4204SJose E. Roman PetscBLASInt one = 1; 310d24d4204SJose E. Roman 311d24d4204SJose E. Roman PetscFunctionBegin; 312792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 313d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 314d24d4204SJose E. Roman PetscFunctionReturn(0); 315d24d4204SJose E. Roman } 316d24d4204SJose E. Roman 3179371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { 318d24d4204SJose E. Roman PetscFunctionBegin; 3199566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3209566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3219566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 322d24d4204SJose E. Roman PetscFunctionReturn(0); 323d24d4204SJose E. Roman } 324d24d4204SJose E. Roman 325d24d4204SJose E. Roman /* --------------------------------------- */ 3269371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) { 327d24d4204SJose E. Roman PetscFunctionBegin; 328d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 329d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 330d24d4204SJose E. Roman PetscFunctionReturn(0); 331d24d4204SJose E. Roman } 332d24d4204SJose E. Roman 3339371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) { 334d24d4204SJose E. Roman PetscFunctionBegin; 335d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 336d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 337d24d4204SJose E. Roman PetscFunctionReturn(0); 338d24d4204SJose E. Roman } 339d24d4204SJose E. Roman 3409371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) { 341d24d4204SJose E. Roman Mat_Product *product = C->product; 342d24d4204SJose E. Roman 343d24d4204SJose E. Roman PetscFunctionBegin; 344d24d4204SJose E. Roman switch (product->type) { 3459371c9d4SSatish Balay case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break; 3469371c9d4SSatish Balay case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break; 34798921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 348d24d4204SJose E. Roman } 349d24d4204SJose E. Roman PetscFunctionReturn(0); 350d24d4204SJose E. Roman } 351d24d4204SJose E. Roman /* --------------------------------------- */ 352d24d4204SJose E. Roman 3539371c9d4SSatish Balay static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) { 354d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 355d24d4204SJose E. Roman PetscScalar *darray, *d2d, v; 356d24d4204SJose E. Roman const PetscInt *ranges; 357d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 358d24d4204SJose E. Roman 359d24d4204SJose E. Roman PetscFunctionBegin; 3609566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray)); 361d24d4204SJose E. Roman 362d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */ 363d24d4204SJose E. Roman 364d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 3659566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 3669566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 367d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 368792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 369d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 370d24d4204SJose E. Roman 371d24d4204SJose E. Roman /* allocate 2d vector */ 372d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 3739566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 374d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 375d24d4204SJose E. Roman 376d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 377792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 378d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 379d24d4204SJose E. Roman 380d24d4204SJose E. Roman /* collect diagonal */ 381d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) { 382792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 383792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 384d24d4204SJose E. Roman } 385d24d4204SJose E. Roman 386d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 387792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 3889566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 389d24d4204SJose E. Roman 390d24d4204SJose E. Roman } else { /* column version */ 391d24d4204SJose E. Roman 392d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 3939566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 3949566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 395d24d4204SJose E. Roman dlld = 1; 396792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 397d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 398d24d4204SJose E. Roman 399d24d4204SJose E. Roman /* allocate 2d vector */ 400d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4019566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 402d24d4204SJose E. Roman 403d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 404792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 405d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 406d24d4204SJose E. Roman 407d24d4204SJose E. Roman /* collect diagonal */ 408d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) { 409792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 410792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 411d24d4204SJose E. Roman } 412d24d4204SJose E. Roman 413d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 414792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 4159566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 416d24d4204SJose E. Roman } 417d24d4204SJose E. Roman 4189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray)); 4199566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4209566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 421d24d4204SJose E. Roman PetscFunctionReturn(0); 422d24d4204SJose E. Roman } 423d24d4204SJose E. Roman 4249371c9d4SSatish Balay static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) { 425d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 426d24d4204SJose E. Roman const PetscScalar *d; 427d24d4204SJose E. Roman const PetscInt *ranges; 428d24d4204SJose E. Roman PetscScalar *d2d; 429d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 430d24d4204SJose E. Roman 431d24d4204SJose E. Roman PetscFunctionBegin; 432d24d4204SJose E. Roman if (R) { 4339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 434d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4359566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4369566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 437d24d4204SJose E. Roman dlld = 1; 438792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 439d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 440d24d4204SJose E. Roman 441d24d4204SJose E. Roman /* allocate 2d vector */ 442d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4439566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 444d24d4204SJose E. Roman 445d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 446792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 447d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 448d24d4204SJose E. Roman 449d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 450792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 451d24d4204SJose E. Roman 452d24d4204SJose E. Roman /* broadcast along process columns */ 453d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 454d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 455d24d4204SJose E. Roman 456d24d4204SJose E. Roman /* local scaling */ 4579371c9d4SSatish Balay for (j = 0; j < a->locc; j++) 4589371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 459d24d4204SJose E. Roman 4609566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 4619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 462d24d4204SJose E. Roman } 463d24d4204SJose E. Roman if (L) { 4649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 465d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4669566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 4679566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 468d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 469792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 470d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 471d24d4204SJose E. Roman 472d24d4204SJose E. Roman /* allocate 2d vector */ 473d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 4749566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 475d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 476d24d4204SJose E. Roman 477d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 478792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 479d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 480d24d4204SJose E. Roman 481d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 482792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 483d24d4204SJose E. Roman 484d24d4204SJose E. Roman /* broadcast along process rows */ 485d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 486d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 487d24d4204SJose E. Roman 488d24d4204SJose E. Roman /* local scaling */ 4899371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 4909371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 491d24d4204SJose E. Roman 4929566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 4939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 494d24d4204SJose E. Roman } 495d24d4204SJose E. Roman PetscFunctionReturn(0); 496d24d4204SJose E. Roman } 497d24d4204SJose E. Roman 4989371c9d4SSatish Balay static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) { 499d24d4204SJose E. Roman PetscFunctionBegin; 500d24d4204SJose E. Roman *missing = PETSC_FALSE; 501d24d4204SJose E. Roman PetscFunctionReturn(0); 502d24d4204SJose E. Roman } 503d24d4204SJose E. Roman 5049371c9d4SSatish Balay static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) { 505d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 506d24d4204SJose E. Roman PetscBLASInt n, one = 1; 507d24d4204SJose E. Roman 508d24d4204SJose E. Roman PetscFunctionBegin; 509d24d4204SJose E. Roman n = x->lld * x->locc; 510792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 511d24d4204SJose E. Roman PetscFunctionReturn(0); 512d24d4204SJose E. Roman } 513d24d4204SJose E. Roman 5149371c9d4SSatish Balay static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) { 515d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 516d24d4204SJose E. Roman PetscBLASInt i, n; 517d24d4204SJose E. Roman PetscScalar v; 518d24d4204SJose E. Roman 519d24d4204SJose E. Roman PetscFunctionBegin; 520d24d4204SJose E. Roman n = PetscMin(x->M, x->N); 521d24d4204SJose E. Roman for (i = 1; i <= n; i++) { 522792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 523d24d4204SJose E. Roman v += alpha; 524792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 525d24d4204SJose E. Roman } 526d24d4204SJose E. Roman PetscFunctionReturn(0); 527d24d4204SJose E. Roman } 528d24d4204SJose E. Roman 5299371c9d4SSatish Balay static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 530d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 531d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 532d24d4204SJose E. Roman PetscBLASInt one = 1; 533d24d4204SJose E. Roman PetscScalar beta = 1.0; 534d24d4204SJose E. Roman 535d24d4204SJose E. Roman PetscFunctionBegin; 536d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3); 537792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 5389566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 539d24d4204SJose E. Roman PetscFunctionReturn(0); 540d24d4204SJose E. Roman } 541d24d4204SJose E. Roman 5429371c9d4SSatish Balay static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) { 543d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 544d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 545d24d4204SJose E. Roman 546d24d4204SJose E. Roman PetscFunctionBegin; 5479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 5489566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 549d24d4204SJose E. Roman PetscFunctionReturn(0); 550d24d4204SJose E. Roman } 551d24d4204SJose E. Roman 5529371c9d4SSatish Balay static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) { 553d24d4204SJose E. Roman Mat Bs; 554d24d4204SJose E. Roman MPI_Comm comm; 555d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 556d24d4204SJose E. Roman 557d24d4204SJose E. Roman PetscFunctionBegin; 5589566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5599566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs)); 5609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 5619566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK)); 562d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 563d24d4204SJose E. Roman b->M = a->M; 564d24d4204SJose E. Roman b->N = a->N; 565d24d4204SJose E. Roman b->mb = a->mb; 566d24d4204SJose E. Roman b->nb = a->nb; 567d24d4204SJose E. Roman b->rsrc = a->rsrc; 568d24d4204SJose E. Roman b->csrc = a->csrc; 5699566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 570d24d4204SJose E. Roman *B = Bs; 571*48a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 572d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 573d24d4204SJose E. Roman PetscFunctionReturn(0); 574d24d4204SJose E. Roman } 575d24d4204SJose E. Roman 5769371c9d4SSatish Balay static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { 577d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 578d24d4204SJose E. Roman Mat Bs = *B; 579d24d4204SJose E. Roman PetscBLASInt one = 1; 580d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 581d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 582d24d4204SJose E. Roman PetscInt i, j; 583d24d4204SJose E. Roman #endif 584d24d4204SJose E. Roman 585d24d4204SJose E. Roman PetscFunctionBegin; 5867fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 587d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 5889566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 589d24d4204SJose E. Roman *B = Bs; 590d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 591d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 592792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 593d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 594d24d4204SJose E. Roman /* undo conjugation */ 5959371c9d4SSatish Balay for (i = 0; i < b->locr; i++) 5969371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 597d24d4204SJose E. Roman #endif 598d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 599d24d4204SJose E. Roman PetscFunctionReturn(0); 600d24d4204SJose E. Roman } 601d24d4204SJose E. Roman 6029371c9d4SSatish Balay static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) { 603d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 604d24d4204SJose E. Roman PetscInt i, j; 605d24d4204SJose E. Roman 606d24d4204SJose E. Roman PetscFunctionBegin; 6079371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 6089371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 609d24d4204SJose E. Roman PetscFunctionReturn(0); 610d24d4204SJose E. Roman } 611d24d4204SJose E. Roman 6129371c9d4SSatish Balay static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { 613d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 614d24d4204SJose E. Roman Mat Bs = *B; 615d24d4204SJose E. Roman PetscBLASInt one = 1; 616d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 617d24d4204SJose E. Roman 618d24d4204SJose E. Roman PetscFunctionBegin; 619d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6209566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 621d24d4204SJose E. Roman *B = Bs; 622d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 623d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 624792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 625d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 626d24d4204SJose E. Roman PetscFunctionReturn(0); 627d24d4204SJose E. Roman } 628d24d4204SJose E. Roman 6299371c9d4SSatish Balay static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) { 630d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 631d24d4204SJose E. Roman PetscScalar *x, *x2d; 632d24d4204SJose E. Roman const PetscInt *ranges; 633d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 634d24d4204SJose E. Roman 635d24d4204SJose E. Roman PetscFunctionBegin; 6369566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X)); 6379566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 638d24d4204SJose E. Roman 639d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 6409566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 6419566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 642d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 643792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 644d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 645d24d4204SJose E. Roman 646d24d4204SJose E. Roman /* allocate 2d vector */ 647d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 6489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d)); 649d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 650d24d4204SJose E. Roman 651d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 652792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 653d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 654d24d4204SJose E. Roman 655d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 656792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 657d24d4204SJose E. Roman 658d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 659d24d4204SJose E. Roman switch (A->factortype) { 660d24d4204SJose E. Roman case MAT_FACTOR_LU: 661792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 662d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 663d24d4204SJose E. Roman break; 664d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 665792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 666d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 667d24d4204SJose E. Roman break; 6689371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 669d24d4204SJose E. Roman } 670d24d4204SJose E. Roman 671d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 672792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 673d24d4204SJose E. Roman 6749566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 6759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 676d24d4204SJose E. Roman PetscFunctionReturn(0); 677d24d4204SJose E. Roman } 678d24d4204SJose E. Roman 6799371c9d4SSatish Balay static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) { 680d24d4204SJose E. Roman PetscFunctionBegin; 6819566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X)); 6829566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y)); 683d24d4204SJose E. Roman PetscFunctionReturn(0); 684d24d4204SJose E. Roman } 685d24d4204SJose E. Roman 6869371c9d4SSatish Balay static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) { 687d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 688d24d4204SJose E. Roman PetscBool flg1, flg2; 689d24d4204SJose E. Roman PetscBLASInt one = 1, info; 690d24d4204SJose E. Roman 691d24d4204SJose E. Roman PetscFunctionBegin; 6929566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 6939566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 69408401ef6SPierre Jolivet PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 695d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B, 1, X, 2); 696d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)B->data; 697d24d4204SJose E. Roman x = (Mat_ScaLAPACK *)X->data; 6989566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 699d24d4204SJose E. Roman 700d24d4204SJose E. Roman switch (A->factortype) { 701d24d4204SJose E. Roman case MAT_FACTOR_LU: 702792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 703d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 704d24d4204SJose E. Roman break; 705d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 706792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 707d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 708d24d4204SJose E. Roman break; 7099371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 710d24d4204SJose E. Roman } 711d24d4204SJose E. Roman PetscFunctionReturn(0); 712d24d4204SJose E. Roman } 713d24d4204SJose E. Roman 7149371c9d4SSatish Balay static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) { 715d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 716d24d4204SJose E. Roman PetscBLASInt one = 1, info; 717d24d4204SJose E. Roman 718d24d4204SJose E. Roman PetscFunctionBegin; 719d24d4204SJose E. Roman if (!a->pivots) { 7209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 7219566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, a->locr * sizeof(PetscBLASInt))); 722d24d4204SJose E. Roman } 723792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 724d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info); 725d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 726d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 727d24d4204SJose E. Roman 7289566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7299566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 730d24d4204SJose E. Roman PetscFunctionReturn(0); 731d24d4204SJose E. Roman } 732d24d4204SJose E. Roman 7339371c9d4SSatish Balay static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { 734d24d4204SJose E. Roman PetscFunctionBegin; 7359566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 7369566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 737d24d4204SJose E. Roman PetscFunctionReturn(0); 738d24d4204SJose E. Roman } 739d24d4204SJose E. Roman 7409371c9d4SSatish Balay static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) { 741d24d4204SJose E. Roman PetscFunctionBegin; 742d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 743d24d4204SJose E. Roman PetscFunctionReturn(0); 744d24d4204SJose E. Roman } 745d24d4204SJose E. Roman 7469371c9d4SSatish Balay static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) { 747d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 748d24d4204SJose E. Roman PetscBLASInt one = 1, info; 749d24d4204SJose E. Roman 750d24d4204SJose E. Roman PetscFunctionBegin; 751792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 752d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info); 753d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 754d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 755d24d4204SJose E. Roman 7569566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7579566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 758d24d4204SJose E. Roman PetscFunctionReturn(0); 759d24d4204SJose E. Roman } 760d24d4204SJose E. Roman 7619371c9d4SSatish Balay static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { 762d24d4204SJose E. Roman PetscFunctionBegin; 7639566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 7649566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 765d24d4204SJose E. Roman PetscFunctionReturn(0); 766d24d4204SJose E. Roman } 767d24d4204SJose E. Roman 7689371c9d4SSatish Balay static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) { 769d24d4204SJose E. Roman PetscFunctionBegin; 770d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 771d24d4204SJose E. Roman PetscFunctionReturn(0); 772d24d4204SJose E. Roman } 773d24d4204SJose E. Roman 7749371c9d4SSatish Balay PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) { 775d24d4204SJose E. Roman PetscFunctionBegin; 776d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 777d24d4204SJose E. Roman PetscFunctionReturn(0); 778d24d4204SJose E. Roman } 779d24d4204SJose E. Roman 7809371c9d4SSatish Balay static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) { 781d24d4204SJose E. Roman Mat B; 78259172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 783d24d4204SJose E. Roman 784d24d4204SJose E. Roman PetscFunctionBegin; 785d24d4204SJose E. Roman /* Create the factorization matrix */ 7869566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 78766e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 788d24d4204SJose E. Roman B->factortype = ftype; 7899566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 7909566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 791d24d4204SJose E. Roman 7929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 793d24d4204SJose E. Roman *F = B; 794d24d4204SJose E. Roman PetscFunctionReturn(0); 795d24d4204SJose E. Roman } 796d24d4204SJose E. Roman 7979371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) { 798d24d4204SJose E. Roman PetscFunctionBegin; 7999566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 8009566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 801d24d4204SJose E. Roman PetscFunctionReturn(0); 802d24d4204SJose E. Roman } 803d24d4204SJose E. Roman 8049371c9d4SSatish Balay static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) { 805d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 806d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0; 807d24d4204SJose E. Roman const char *ntype; 808d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy; 809d24d4204SJose E. Roman 810d24d4204SJose E. Roman PetscFunctionBegin; 811d24d4204SJose E. Roman switch (type) { 812d24d4204SJose E. Roman case NORM_1: 813d24d4204SJose E. Roman ntype = "1"; 814d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 815d24d4204SJose E. Roman break; 816d24d4204SJose E. Roman case NORM_FROBENIUS: 817d24d4204SJose E. Roman ntype = "F"; 818d24d4204SJose E. Roman work = &dummy; 819d24d4204SJose E. Roman break; 820d24d4204SJose E. Roman case NORM_INFINITY: 821d24d4204SJose E. Roman ntype = "I"; 822d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 823d24d4204SJose E. Roman break; 8249371c9d4SSatish Balay default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 825d24d4204SJose E. Roman } 8269566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 827d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 8289566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 829d24d4204SJose E. Roman PetscFunctionReturn(0); 830d24d4204SJose E. Roman } 831d24d4204SJose E. Roman 8329371c9d4SSatish Balay static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) { 833d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 834d24d4204SJose E. Roman 835d24d4204SJose E. Roman PetscFunctionBegin; 8369566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 837d24d4204SJose E. Roman PetscFunctionReturn(0); 838d24d4204SJose E. Roman } 839d24d4204SJose E. Roman 8409371c9d4SSatish Balay static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) { 841d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 842d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx; 843d24d4204SJose E. Roman 844d24d4204SJose E. Roman PetscFunctionBegin; 845d24d4204SJose E. Roman if (rows) { 846d24d4204SJose E. Roman n = a->locr; 847d24d4204SJose E. Roman nb = a->mb; 848d24d4204SJose E. Roman isrc = a->rsrc; 849d24d4204SJose E. Roman nproc = a->grid->nprow; 850d24d4204SJose E. Roman iproc = a->grid->myrow; 8519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 852d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 8539566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 854d24d4204SJose E. Roman } 855d24d4204SJose E. Roman if (cols) { 856d24d4204SJose E. Roman n = a->locc; 857d24d4204SJose E. Roman nb = a->nb; 858d24d4204SJose E. Roman isrc = a->csrc; 859d24d4204SJose E. Roman nproc = a->grid->npcol; 860d24d4204SJose E. Roman iproc = a->grid->mycol; 8619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 862d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 8639566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 864d24d4204SJose E. Roman } 865d24d4204SJose E. Roman PetscFunctionReturn(0); 866d24d4204SJose E. Roman } 867d24d4204SJose E. Roman 8689371c9d4SSatish Balay static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) { 869d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 870d24d4204SJose E. Roman Mat Bmpi; 871d24d4204SJose E. Roman MPI_Comm comm; 8724b1a79daSJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 8734b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork; 8744b1a79daSJose E. Roman const PetscScalar *vwork; 875d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 876d24d4204SJose E. Roman PetscScalar *barray; 8774b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE; 8784b1a79daSJose E. Roman PetscMPIInt size; 879d24d4204SJose E. Roman 880d24d4204SJose E. Roman PetscFunctionBegin; 8819566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 8829566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 8834b1a79daSJose E. Roman 8844b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 8859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 8869566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 8879371c9d4SSatish Balay for (i = 0; i < size; i++) 8889371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) { 8899371c9d4SSatish Balay differ = PETSC_TRUE; 8909371c9d4SSatish Balay break; 8919371c9d4SSatish Balay } 8924b1a79daSJose E. Roman } 8934b1a79daSJose E. Roman 8944b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 8959566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 8964b1a79daSJose E. Roman m = PETSC_DECIDE; 8979566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 8984b1a79daSJose E. Roman n = PETSC_DECIDE; 8999566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9009566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9019566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 9029566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 9034b1a79daSJose E. Roman 9044b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9059566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 9064b1a79daSJose E. Roman lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 907792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 9084b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info); 9094b1a79daSJose E. Roman 9104b1a79daSJose E. Roman /* redistribute matrix */ 9119566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 912792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 9139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 9149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 9159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 9164b1a79daSJose E. Roman 9174b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9189566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 9194b1a79daSJose E. Roman for (i = rstart; i < rend; i++) { 9209566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 9219566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 9229566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 9234b1a79daSJose E. Roman } 9249566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 9259566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 9269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 9274b1a79daSJose E. Roman 9284b1a79daSJose E. Roman } else { /* normal cases */ 929d24d4204SJose E. Roman 930d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 931d24d4204SJose E. Roman else { 9329566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 93392c846b4SJose E. Roman m = PETSC_DECIDE; 9349566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 93592c846b4SJose E. Roman n = PETSC_DECIDE; 9369566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9379566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9389566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 9399566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 940d24d4204SJose E. Roman } 941d24d4204SJose E. Roman 942d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9439566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 944d24d4204SJose E. Roman lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 945792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 946d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 947d24d4204SJose E. Roman 948d24d4204SJose E. Roman /* redistribute matrix */ 9499566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 950792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 9519566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 952d24d4204SJose E. Roman 9539566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 9549566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 955d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 9569566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 957d24d4204SJose E. Roman } else *B = Bmpi; 9584b1a79daSJose E. Roman } 959d24d4204SJose E. Roman PetscFunctionReturn(0); 960d24d4204SJose E. Roman } 961d24d4204SJose E. Roman 9629371c9d4SSatish Balay static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) { 963b12397e7SPierre Jolivet const PetscInt *ranges; 964b12397e7SPierre Jolivet PetscMPIInt size; 965b12397e7SPierre Jolivet PetscInt i, n; 966b12397e7SPierre Jolivet 967b12397e7SPierre Jolivet PetscFunctionBegin; 968b12397e7SPierre Jolivet *correct = PETSC_TRUE; 969b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size)); 970b12397e7SPierre Jolivet if (size > 1) { 971b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges)); 972b12397e7SPierre Jolivet n = ranges[1] - ranges[0]; 9739371c9d4SSatish Balay for (i = 1; i < size; i++) 9749371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break; 975b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 976b12397e7SPierre Jolivet } 977b12397e7SPierre Jolivet PetscFunctionReturn(0); 978b12397e7SPierre Jolivet } 979b12397e7SPierre Jolivet 9809371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) { 981d24d4204SJose E. Roman Mat_ScaLAPACK *b; 982d24d4204SJose E. Roman Mat Bmpi; 983d24d4204SJose E. Roman MPI_Comm comm; 98492c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 985b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols; 986d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 987d24d4204SJose E. Roman PetscScalar *aarray; 988b12397e7SPierre Jolivet IS ir, ic; 9894e8b52a1SJose E. Roman PetscInt lda; 990b12397e7SPierre Jolivet PetscBool flg; 991d24d4204SJose E. Roman 992d24d4204SJose E. Roman PetscFunctionBegin; 9939566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 994d24d4204SJose E. Roman 995d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 996d24d4204SJose E. Roman else { 9979566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 99892c846b4SJose E. Roman m = PETSC_DECIDE; 9999566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 100092c846b4SJose E. Roman n = PETSC_DECIDE; 10019566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10029566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10039566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 10049566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1005d24d4204SJose E. Roman } 1006d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data; 1007d24d4204SJose E. Roman 1008b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 1009b12397e7SPierre Jolivet PetscCall(MatDenseGetArray(A, &aarray)); 1010b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1011b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1012b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1013d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 10149566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 10159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 10164e8b52a1SJose E. Roman lld = PetscMax(lda, 1); /* local leading dimension */ 1017792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1018d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1019d24d4204SJose E. Roman 1020d24d4204SJose E. Roman /* redistribute matrix */ 1021792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1022b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1023b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1024b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1025b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1026b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1027b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows)); 1028b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols)); 1029b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1030b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows)); 1031b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols)); 1032b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1033b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1034b12397e7SPierre Jolivet } 10359566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A, &aarray)); 10369566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10379566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1038d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10399566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1040d24d4204SJose E. Roman } else *B = Bmpi; 1041d24d4204SJose E. Roman PetscFunctionReturn(0); 1042d24d4204SJose E. Roman } 1043d24d4204SJose E. Roman 10449371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1045d24d4204SJose E. Roman Mat mat_scal; 1046d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1047d24d4204SJose E. Roman const PetscInt *cols; 1048d24d4204SJose E. Roman const PetscScalar *vals; 1049d24d4204SJose E. Roman 1050d24d4204SJose E. Roman PetscFunctionBegin; 1051d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1052d24d4204SJose E. Roman mat_scal = *newmat; 10539566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1054d24d4204SJose E. Roman } else { 10559566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1056d24d4204SJose E. Roman m = PETSC_DECIDE; 10579566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1058d24d4204SJose E. Roman n = PETSC_DECIDE; 10599566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 10609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 10619566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 10629566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1063d24d4204SJose E. Roman } 1064d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 10659566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 10669566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 10679566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1068d24d4204SJose E. Roman } 10699566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 10709566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1071d24d4204SJose E. Roman 10729566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1073d24d4204SJose E. Roman else *newmat = mat_scal; 1074d24d4204SJose E. Roman PetscFunctionReturn(0); 1075d24d4204SJose E. Roman } 1076d24d4204SJose E. Roman 10779371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1078d24d4204SJose E. Roman Mat mat_scal; 1079d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1080d24d4204SJose E. Roman const PetscInt *cols; 1081d24d4204SJose E. Roman const PetscScalar *vals; 1082d24d4204SJose E. Roman PetscScalar v; 1083d24d4204SJose E. Roman 1084d24d4204SJose E. Roman PetscFunctionBegin; 1085d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1086d24d4204SJose E. Roman mat_scal = *newmat; 10879566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1088d24d4204SJose E. Roman } else { 10899566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1090d24d4204SJose E. Roman m = PETSC_DECIDE; 10919566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1092d24d4204SJose E. Roman n = PETSC_DECIDE; 10939566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 10949566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 10959566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 10969566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1097d24d4204SJose E. Roman } 10989566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1099d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11009566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11019566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1102d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */ 1103d24d4204SJose E. Roman if (cols[j] == row) continue; 1104b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 11059566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1106d24d4204SJose E. Roman } 11079566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1108d24d4204SJose E. Roman } 11099566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 11109566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11119566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1112d24d4204SJose E. Roman 11139566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1114d24d4204SJose E. Roman else *newmat = mat_scal; 1115d24d4204SJose E. Roman PetscFunctionReturn(0); 1116d24d4204SJose E. Roman } 1117d24d4204SJose E. Roman 11189371c9d4SSatish Balay static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) { 1119d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1120d24d4204SJose E. Roman PetscInt sz = 0; 1121d24d4204SJose E. Roman 1122d24d4204SJose E. Roman PetscFunctionBegin; 11239566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1125d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1126d24d4204SJose E. Roman 11279566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11289566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 11299566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc)); 11309566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, sz * sizeof(PetscScalar))); 1131d24d4204SJose E. Roman 1132d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1133d24d4204SJose E. Roman PetscFunctionReturn(0); 1134d24d4204SJose E. Roman } 1135d24d4204SJose E. Roman 11369371c9d4SSatish Balay static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) { 1137d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1138d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1139d24d4204SJose E. Roman PetscBool flg; 1140d24d4204SJose E. Roman MPI_Comm icomm; 1141d24d4204SJose E. Roman 1142d24d4204SJose E. Roman PetscFunctionBegin; 11439566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 11449566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11459566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 11469566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 11479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1148d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1149d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1150d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1151d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 11529566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 11539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1154d24d4204SJose E. Roman } 11559566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 11569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 11579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 11589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 11599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 11609566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1161d24d4204SJose E. Roman PetscFunctionReturn(0); 1162d24d4204SJose E. Roman } 1163d24d4204SJose E. Roman 11649371c9d4SSatish Balay PetscErrorCode MatSetUp_ScaLAPACK(Mat A) { 1165d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1166d24d4204SJose E. Roman PetscBLASInt info = 0; 1167b12397e7SPierre Jolivet PetscBool flg; 1168d24d4204SJose E. Roman 1169d24d4204SJose E. Roman PetscFunctionBegin; 11709566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11719566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1172d24d4204SJose E. Roman 1173b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1174b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1175b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1176b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1177b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1178d24d4204SJose E. Roman 1179d24d4204SJose E. Roman /* compute local sizes */ 11809566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 11819566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1182d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1183d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1184d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr); 1185d24d4204SJose E. Roman 1186d24d4204SJose E. Roman /* allocate local array */ 11879566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1188d24d4204SJose E. Roman 1189d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1190792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1191d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1192d24d4204SJose E. Roman PetscFunctionReturn(0); 1193d24d4204SJose E. Roman } 1194d24d4204SJose E. Roman 11959371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) { 1196d24d4204SJose E. Roman PetscInt nstash, reallocs; 1197d24d4204SJose E. Roman 1198d24d4204SJose E. Roman PetscFunctionBegin; 1199d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 12009566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 12019566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 12029566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1203d24d4204SJose E. Roman PetscFunctionReturn(0); 1204d24d4204SJose E. Roman } 1205d24d4204SJose E. Roman 12069371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) { 1207d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1208d24d4204SJose E. Roman PetscMPIInt n; 1209d24d4204SJose E. Roman PetscInt i, flg, *row, *col; 1210d24d4204SJose E. Roman PetscScalar *val; 1211d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1212d24d4204SJose E. Roman 1213d24d4204SJose E. Roman PetscFunctionBegin; 1214d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1215d24d4204SJose E. Roman while (1) { 12169566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1217d24d4204SJose E. Roman if (!flg) break; 1218d24d4204SJose E. Roman for (i = 0; i < n; i++) { 12199566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 12209566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1221792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1222aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1223d24d4204SJose E. Roman switch (A->insertmode) { 1224d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break; 1225d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break; 122698921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1227d24d4204SJose E. Roman } 1228d24d4204SJose E. Roman } 1229d24d4204SJose E. Roman } 12309566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 1231d24d4204SJose E. Roman PetscFunctionReturn(0); 1232d24d4204SJose E. Roman } 1233d24d4204SJose E. Roman 12349371c9d4SSatish Balay PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) { 1235d24d4204SJose E. Roman Mat Adense, As; 1236d24d4204SJose E. Roman MPI_Comm comm; 1237d24d4204SJose E. Roman 1238d24d4204SJose E. Roman PetscFunctionBegin; 12399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 12409566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense)); 12419566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE)); 12429566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer)); 12439566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 12449566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 12459566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As)); 1246d24d4204SJose E. Roman PetscFunctionReturn(0); 1247d24d4204SJose E. Roman } 1248d24d4204SJose E. Roman 1249d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 12509371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1251d24d4204SJose E. Roman 0, 1252d24d4204SJose E. Roman 0, 1253d24d4204SJose E. Roman MatMult_ScaLAPACK, 1254d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1255d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1256d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1257d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1258d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1259d24d4204SJose E. Roman 0, 1260d24d4204SJose E. Roman /*10*/ 0, 1261d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1262d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1263d24d4204SJose E. Roman 0, 1264d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1265d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1266d24d4204SJose E. Roman 0, 1267d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1268d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1269d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1270d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1271d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1272d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1273d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1274d24d4204SJose E. Roman /*24*/ 0, 1275d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1276d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1277d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1278d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1279d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1280d24d4204SJose E. Roman 0, 1281d24d4204SJose E. Roman 0, 1282d24d4204SJose E. Roman 0, 1283d24d4204SJose E. Roman 0, 1284d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1285d24d4204SJose E. Roman 0, 1286d24d4204SJose E. Roman 0, 1287d24d4204SJose E. Roman 0, 1288d24d4204SJose E. Roman 0, 1289d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1290d24d4204SJose E. Roman 0, 1291d24d4204SJose E. Roman 0, 1292d24d4204SJose E. Roman 0, 1293d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1294d24d4204SJose E. Roman /*44*/ 0, 1295d24d4204SJose E. Roman MatScale_ScaLAPACK, 1296d24d4204SJose E. Roman MatShift_ScaLAPACK, 1297d24d4204SJose E. Roman 0, 1298d24d4204SJose E. Roman 0, 1299d24d4204SJose E. Roman /*49*/ 0, 1300d24d4204SJose E. Roman 0, 1301d24d4204SJose E. Roman 0, 1302d24d4204SJose E. Roman 0, 1303d24d4204SJose E. Roman 0, 1304d24d4204SJose E. Roman /*54*/ 0, 1305d24d4204SJose E. Roman 0, 1306d24d4204SJose E. Roman 0, 1307d24d4204SJose E. Roman 0, 1308d24d4204SJose E. Roman 0, 1309d24d4204SJose E. Roman /*59*/ 0, 1310d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1311d24d4204SJose E. Roman MatView_ScaLAPACK, 1312d24d4204SJose E. Roman 0, 1313d24d4204SJose E. Roman 0, 1314d24d4204SJose E. Roman /*64*/ 0, 1315d24d4204SJose E. Roman 0, 1316d24d4204SJose E. Roman 0, 1317d24d4204SJose E. Roman 0, 1318d24d4204SJose E. Roman 0, 1319d24d4204SJose E. Roman /*69*/ 0, 1320d24d4204SJose E. Roman 0, 1321d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1322d24d4204SJose E. Roman 0, 1323d24d4204SJose E. Roman 0, 1324d24d4204SJose E. Roman /*74*/ 0, 1325d24d4204SJose E. Roman 0, 1326d24d4204SJose E. Roman 0, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman 0, 1329d24d4204SJose E. Roman /*79*/ 0, 1330d24d4204SJose E. Roman 0, 1331d24d4204SJose E. Roman 0, 1332d24d4204SJose E. Roman 0, 1333d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1334d24d4204SJose E. Roman /*84*/ 0, 1335d24d4204SJose E. Roman 0, 1336d24d4204SJose E. Roman 0, 1337d24d4204SJose E. Roman 0, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman /*89*/ 0, 1340d24d4204SJose E. Roman 0, 1341d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1342d24d4204SJose E. Roman 0, 1343d24d4204SJose E. Roman 0, 1344d24d4204SJose E. Roman /*94*/ 0, 1345d24d4204SJose E. Roman 0, 1346d24d4204SJose E. Roman 0, 1347d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1348d24d4204SJose E. Roman 0, 1349d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1350d24d4204SJose E. Roman 0, 1351d24d4204SJose E. Roman 0, 1352d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman /*104*/ 0, 1355d24d4204SJose E. Roman 0, 1356d24d4204SJose E. Roman 0, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman /*109*/ MatMatSolve_ScaLAPACK, 1360d24d4204SJose E. Roman 0, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1364d24d4204SJose E. Roman /*114*/ 0, 1365d24d4204SJose E. Roman 0, 1366d24d4204SJose E. Roman 0, 1367d24d4204SJose E. Roman 0, 1368d24d4204SJose E. Roman 0, 1369d24d4204SJose E. Roman /*119*/ 0, 1370d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1371d24d4204SJose E. Roman 0, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman 0, 1374d24d4204SJose E. Roman /*124*/ 0, 1375d24d4204SJose E. Roman 0, 1376d24d4204SJose E. Roman 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman 0, 1379d24d4204SJose E. Roman /*129*/ 0, 1380d24d4204SJose E. Roman 0, 1381d24d4204SJose E. Roman 0, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman /*134*/ 0, 1385d24d4204SJose E. Roman 0, 1386d24d4204SJose E. Roman 0, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman 0, 1390d24d4204SJose E. Roman /*140*/ 0, 1391d24d4204SJose E. Roman 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman 0, 1395d24d4204SJose E. Roman /*145*/ 0, 1396d24d4204SJose E. Roman 0, 139799a7f59eSMark Adams 0, 139899a7f59eSMark Adams 0, 13997fb60732SBarry Smith 0, 14009371c9d4SSatish Balay /*150*/ 0}; 1401d24d4204SJose E. Roman 14029371c9d4SSatish Balay static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) { 1403d24d4204SJose E. Roman PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1404d24d4204SJose E. Roman PetscInt size = stash->size, nsends; 1405d24d4204SJose E. Roman PetscInt count, *sindices, **rindices, i, j, l; 1406d24d4204SJose E. Roman PetscScalar **rvalues, *svalues; 1407d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1408d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1409d24d4204SJose E. Roman PetscMPIInt *sizes, *nlengths, nreceives; 1410d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy; 1411d24d4204SJose E. Roman PetscScalar *sp_val; 1412d24d4204SJose E. Roman PetscMatStashSpace space, space_next; 1413d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1414d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1415d24d4204SJose E. Roman 1416d24d4204SJose E. Roman PetscFunctionBegin; 1417d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1418d24d4204SJose E. Roman InsertMode addv; 14191c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 142008401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1421d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1422d24d4204SJose E. Roman } 1423d24d4204SJose E. Roman 1424d24d4204SJose E. Roman bs2 = stash->bs * stash->bs; 1425d24d4204SJose E. Roman 1426d24d4204SJose E. Roman /* first count number of contributors to each processor */ 14279566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 14289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1429d24d4204SJose E. Roman 1430d24d4204SJose E. Roman i = j = 0; 1431d24d4204SJose E. Roman space = stash->space_head; 1432d24d4204SJose E. Roman while (space) { 1433d24d4204SJose E. Roman space_next = space->next; 1434d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 14359566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 14369566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1437792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1438d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 14399371c9d4SSatish Balay nlengths[j]++; 14409371c9d4SSatish Balay owner[i] = j; 1441d24d4204SJose E. Roman i++; 1442d24d4204SJose E. Roman } 1443d24d4204SJose E. Roman space = space_next; 1444d24d4204SJose E. Roman } 1445d24d4204SJose E. Roman 1446d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 14479566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 1448d24d4204SJose E. Roman for (i = 0, nsends = 0; i < size; i++) { 1449d24d4204SJose E. Roman if (nlengths[i]) { 14509371c9d4SSatish Balay sizes[i] = 1; 14519371c9d4SSatish Balay nsends++; 1452d24d4204SJose E. Roman } 1453d24d4204SJose E. Roman } 1454d24d4204SJose E. Roman 14559371c9d4SSatish Balay { 14569371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 1457d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 14589566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 14599566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1460d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1461d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] *= 2; 14629566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1463d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1464d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 14659566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 14669566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 14679371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 14689371c9d4SSatish Balay } 1469d24d4204SJose E. Roman 1470d24d4204SJose E. Roman /* do sends: 1471d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1472d24d4204SJose E. Roman the ith processor 1473d24d4204SJose E. Roman */ 14749566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 14759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 14769566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1477d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 14789371c9d4SSatish Balay startv[0] = 0; 14799371c9d4SSatish Balay starti[0] = 0; 1480d24d4204SJose E. Roman for (i = 1; i < size; i++) { 1481d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1]; 1482d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1483d24d4204SJose E. Roman } 1484d24d4204SJose E. Roman 1485d24d4204SJose E. Roman i = 0; 1486d24d4204SJose E. Roman space = stash->space_head; 1487d24d4204SJose E. Roman while (space) { 1488d24d4204SJose E. Roman space_next = space->next; 1489d24d4204SJose E. Roman sp_idx = space->idx; 1490d24d4204SJose E. Roman sp_idy = space->idy; 1491d24d4204SJose E. Roman sp_val = space->val; 1492d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 1493d24d4204SJose E. Roman j = owner[i]; 1494d24d4204SJose E. Roman if (bs2 == 1) { 1495d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1496d24d4204SJose E. Roman } else { 1497d24d4204SJose E. Roman PetscInt k; 1498d24d4204SJose E. Roman PetscScalar *buf1, *buf2; 1499d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j]; 1500d24d4204SJose E. Roman buf2 = space->val + bs2 * l; 1501d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1502d24d4204SJose E. Roman } 1503d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1504d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1505d24d4204SJose E. Roman startv[j]++; 1506d24d4204SJose E. Roman starti[j]++; 1507d24d4204SJose E. Roman i++; 1508d24d4204SJose E. Roman } 1509d24d4204SJose E. Roman space = space_next; 1510d24d4204SJose E. Roman } 1511d24d4204SJose E. Roman startv[0] = 0; 1512d24d4204SJose E. Roman for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1513d24d4204SJose E. Roman 1514d24d4204SJose E. Roman for (i = 0, count = 0; i < size; i++) { 1515d24d4204SJose E. Roman if (sizes[i]) { 15169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 15179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1518d24d4204SJose E. Roman } 1519d24d4204SJose E. Roman } 1520d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 15219566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1522d24d4204SJose E. Roman for (i = 0; i < size; i++) { 1523*48a46eb9SPierre Jolivet if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1524d24d4204SJose E. Roman } 1525d24d4204SJose E. Roman #endif 15269566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 15279566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 15289566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 15299566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1530d24d4204SJose E. Roman 1531d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 15329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1533d24d4204SJose E. Roman 1534d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) { 1535d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i]; 1536d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i]; 1537d24d4204SJose E. Roman } 1538d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1539d24d4204SJose E. Roman 15409566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 15419566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1542d24d4204SJose E. Roman 1543d24d4204SJose E. Roman stash->svalues = svalues; 1544d24d4204SJose E. Roman stash->sindices = sindices; 1545d24d4204SJose E. Roman stash->rvalues = rvalues; 1546d24d4204SJose E. Roman stash->rindices = rindices; 1547d24d4204SJose E. Roman stash->send_waits = send_waits; 1548d24d4204SJose E. Roman stash->nsends = nsends; 1549d24d4204SJose E. Roman stash->nrecvs = nreceives; 1550d24d4204SJose E. Roman stash->reproduce_count = 0; 1551d24d4204SJose E. Roman PetscFunctionReturn(0); 1552d24d4204SJose E. Roman } 1553d24d4204SJose E. Roman 15549371c9d4SSatish Balay static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) { 1555d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1556d24d4204SJose E. Roman 1557d24d4204SJose E. Roman PetscFunctionBegin; 155828b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1559aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1560aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 15619566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 15629566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1563d24d4204SJose E. Roman PetscFunctionReturn(0); 1564d24d4204SJose E. Roman } 1565d24d4204SJose E. Roman 1566d24d4204SJose E. Roman /*@ 15676aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1568d24d4204SJose E. Roman the ScaLAPACK matrix 1569d24d4204SJose E. Roman 1570d24d4204SJose E. Roman Logically Collective on A 1571d24d4204SJose E. Roman 1572d8d19677SJose E. Roman Input Parameters: 1573d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1574d24d4204SJose E. Roman . mb - the row block size 1575d24d4204SJose E. Roman - nb - the column block size 1576d24d4204SJose E. Roman 1577d24d4204SJose E. Roman Level: intermediate 1578d24d4204SJose E. Roman 1579db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1580d24d4204SJose E. Roman @*/ 15819371c9d4SSatish Balay PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) { 1582d24d4204SJose E. Roman PetscFunctionBegin; 1583d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1584d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2); 1585d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3); 1586cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1587d24d4204SJose E. Roman PetscFunctionReturn(0); 1588d24d4204SJose E. Roman } 1589d24d4204SJose E. Roman 15909371c9d4SSatish Balay static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) { 1591d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1592d24d4204SJose E. Roman 1593d24d4204SJose E. Roman PetscFunctionBegin; 1594d24d4204SJose E. Roman if (mb) *mb = a->mb; 1595d24d4204SJose E. Roman if (nb) *nb = a->nb; 1596d24d4204SJose E. Roman PetscFunctionReturn(0); 1597d24d4204SJose E. Roman } 1598d24d4204SJose E. Roman 1599d24d4204SJose E. Roman /*@ 16006aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1601d24d4204SJose E. Roman the ScaLAPACK matrix 1602d24d4204SJose E. Roman 1603d24d4204SJose E. Roman Not collective 1604d24d4204SJose E. Roman 1605d24d4204SJose E. Roman Input Parameter: 1606d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1607d24d4204SJose E. Roman 1608d24d4204SJose E. Roman Output Parameters: 1609d24d4204SJose E. Roman + mb - the row block size 1610d24d4204SJose E. Roman - nb - the column block size 1611d24d4204SJose E. Roman 1612d24d4204SJose E. Roman Level: intermediate 1613d24d4204SJose E. Roman 1614db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1615d24d4204SJose E. Roman @*/ 16169371c9d4SSatish Balay PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) { 1617d24d4204SJose E. Roman PetscFunctionBegin; 1618d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1619cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1620d24d4204SJose E. Roman PetscFunctionReturn(0); 1621d24d4204SJose E. Roman } 1622d24d4204SJose E. Roman 1623d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1624d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1625d24d4204SJose E. Roman 1626d24d4204SJose E. Roman /*MC 1627d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1628d24d4204SJose E. Roman 1629d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1630d24d4204SJose E. Roman 1631d24d4204SJose E. Roman Options Database Keys: 1632d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 163389bba20eSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1634d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1635d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1636d24d4204SJose E. Roman 163789bba20eSBarry Smith Note: 163889bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 163989bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 164089bba20eSBarry Smith the given rank. 164189bba20eSBarry Smith 1642d24d4204SJose E. Roman Level: beginner 1643d24d4204SJose E. Roman 164489bba20eSBarry Smith .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1645d24d4204SJose E. Roman M*/ 1646d24d4204SJose E. Roman 16479371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) { 1648d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1649d24d4204SJose E. Roman PetscBool flg, flg1; 1650d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1651d24d4204SJose E. Roman MPI_Comm icomm; 1652d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol; 1653d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0}; 1654d24d4204SJose E. Roman PetscMPIInt size; 1655d24d4204SJose E. Roman 1656d24d4204SJose E. Roman PetscFunctionBegin; 16579566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 1658d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1659d24d4204SJose E. Roman 16609566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1661d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1662d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1663d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1664d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1665d24d4204SJose E. Roman 16669566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A, &a)); 1667d24d4204SJose E. Roman A->data = (void *)a; 1668d24d4204SJose E. Roman 1669d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1670d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 16719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 16729566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 16739566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1674d24d4204SJose E. Roman } 16759566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 16769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1677d24d4204SJose E. Roman if (!flg) { 16789566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A, &grid)); 1679d24d4204SJose E. Roman 16809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size)); 1681d24d4204SJose E. Roman grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1682d24d4204SJose E. Roman 1683d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 16849566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1685d24d4204SJose E. Roman if (flg1) { 168608401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1687d24d4204SJose E. Roman grid->nprow = optv1; 1688d24d4204SJose E. Roman } 1689d0609cedSBarry Smith PetscOptionsEnd(); 1690d24d4204SJose E. Roman 1691d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1692d24d4204SJose E. Roman grid->npcol = size / grid->nprow; 16939566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 16949566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1695f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1696d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1697d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1698d24d4204SJose E. Roman grid->grid_refct = 1; 1699d24d4204SJose E. Roman grid->nprow = nprow; 1700d24d4204SJose E. Roman grid->npcol = npcol; 1701d24d4204SJose E. Roman grid->myrow = myrow; 1702d24d4204SJose E. Roman grid->mycol = mycol; 1703d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1704f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1705d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1706f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1707d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 17089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1709d24d4204SJose E. Roman 1710d24d4204SJose E. Roman } else grid->grid_refct++; 17119566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1712d24d4204SJose E. Roman a->grid = grid; 1713d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1714d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1715d24d4204SJose E. Roman 1716d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 17179566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1718d24d4204SJose E. Roman if (flg) { 1719d24d4204SJose E. Roman a->mb = array[0]; 1720d24d4204SJose E. Roman a->nb = (k > 1) ? array[1] : a->mb; 1721d24d4204SJose E. Roman } 1722d0609cedSBarry Smith PetscOptionsEnd(); 1723d24d4204SJose E. Roman 1724b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 17259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 17269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 17279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 17289566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1729d24d4204SJose E. Roman PetscFunctionReturn(0); 1730d24d4204SJose E. Roman } 1731d24d4204SJose E. Roman 1732d24d4204SJose E. Roman /*@C 1733d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1734d24d4204SJose E. Roman (2D block cyclic distribution). 1735d24d4204SJose E. Roman 1736d24d4204SJose E. Roman Collective 1737d24d4204SJose E. Roman 1738d24d4204SJose E. Roman Input Parameters: 1739d24d4204SJose E. Roman + comm - MPI communicator 1740d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1741d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1742d24d4204SJose E. Roman . M - number of global rows 1743d24d4204SJose E. Roman . N - number of global columns 1744d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1745d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1746d24d4204SJose E. Roman 1747d24d4204SJose E. Roman Output Parameter: 1748d24d4204SJose E. Roman . A - the matrix 1749d24d4204SJose E. Roman 1750d24d4204SJose E. Roman Options Database Keys: 1751d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1752d24d4204SJose E. Roman 1753d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1754d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1755d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1756d24d4204SJose E. Roman 1757d24d4204SJose E. Roman Notes: 1758d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1759d24d4204SJose E. Roman is chosen. 1760d24d4204SJose E. Roman 1761d24d4204SJose E. Roman Storage Information: 1762d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1763d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1764d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1765d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1766d24d4204SJose E. Roman 1767d24d4204SJose E. Roman Level: intermediate 1768d24d4204SJose E. Roman 1769db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1770d24d4204SJose E. Roman @*/ 17719371c9d4SSatish Balay PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) { 1772d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1773d24d4204SJose E. Roman PetscInt m, n; 1774d24d4204SJose E. Roman 1775d24d4204SJose E. Roman PetscFunctionBegin; 17769566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 17779566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK)); 1778aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1779d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1780d24d4204SJose E. Roman m = PETSC_DECIDE; 17819566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1782d24d4204SJose E. Roman n = PETSC_DECIDE; 17839566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 17849566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1785d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data; 17869566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M)); 17879566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N)); 17889566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 17899566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 17909566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 17919566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 17929566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 1793d24d4204SJose E. Roman PetscFunctionReturn(0); 1794d24d4204SJose E. Roman } 1795