1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23d71ae5a4SJacob Faibussowitsch { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27f7ec113fSDamian Marek PetscFunctionReturn(0); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31d71ae5a4SJacob Faibussowitsch { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39d24d4204SJose E. Roman if (iascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46d24d4204SJose E. Roman PetscFunctionReturn(0); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48d24d4204SJose E. Roman PetscFunctionReturn(0); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 55d24d4204SJose E. Roman PetscFunctionReturn(0); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59d71ae5a4SJacob Faibussowitsch { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 7257168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 7657168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 844dfa11a4SJacob Faibussowitsch info->memory = 0; /* REVIEW ME */ 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 88d24d4204SJose E. Roman PetscFunctionReturn(0); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 91d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92d71ae5a4SJacob Faibussowitsch { 93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94b12397e7SPierre Jolivet 95d24d4204SJose E. Roman PetscFunctionBegin; 96d24d4204SJose E. Roman switch (op) { 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 100d24d4204SJose E. Roman case MAT_SYMMETRIC: 101d24d4204SJose E. Roman case MAT_SORTED_FULL: 102d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 103d71ae5a4SJacob Faibussowitsch break; 104d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 105d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 106d71ae5a4SJacob Faibussowitsch break; 107d71ae5a4SJacob Faibussowitsch default: 108d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109d24d4204SJose E. Roman } 110d24d4204SJose E. Roman PetscFunctionReturn(0); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman 113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114d71ae5a4SJacob Faibussowitsch { 115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116d24d4204SJose E. Roman PetscInt i, j; 117d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 119d24d4204SJose E. Roman 120d24d4204SJose E. Roman PetscFunctionBegin; 121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122d24d4204SJose E. Roman for (i = 0; i < nr; i++) { 123d24d4204SJose E. Roman if (rows[i] < 0) continue; 1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125d24d4204SJose E. Roman for (j = 0; j < nc; j++) { 126d24d4204SJose E. Roman if (cols[j] < 0) continue; 1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130b12397e7SPierre Jolivet if (roworiented) { 131d24d4204SJose E. Roman switch (imode) { 132d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 133d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134d71ae5a4SJacob Faibussowitsch break; 135d71ae5a4SJacob Faibussowitsch default: 136d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137d71ae5a4SJacob Faibussowitsch break; 138b12397e7SPierre Jolivet } 139b12397e7SPierre Jolivet } else { 140b12397e7SPierre Jolivet switch (imode) { 141d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 142d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143d71ae5a4SJacob Faibussowitsch break; 144d71ae5a4SJacob Faibussowitsch default: 145d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146d71ae5a4SJacob Faibussowitsch break; 147b12397e7SPierre Jolivet } 148d24d4204SJose E. Roman } 149d24d4204SJose E. Roman } else { 15028b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 152b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153d24d4204SJose E. Roman } 154d24d4204SJose E. Roman } 155d24d4204SJose E. Roman } 156d24d4204SJose E. Roman PetscFunctionReturn(0); 157d24d4204SJose E. Roman } 158d24d4204SJose E. Roman 159d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160d71ae5a4SJacob Faibussowitsch { 161d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0; 163d24d4204SJose E. Roman const PetscInt *ranges; 164d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman PetscFunctionBegin; 167d24d4204SJose E. Roman if (transpose) { 168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 172792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1749566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176d24d4204SJose E. Roman ylld = 1; 177792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 179d24d4204SJose E. Roman 180d24d4204SJose E. Roman /* allocate 2d vectors */ 181d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 189792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 191d24d4204SJose E. Roman 192d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 193792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194d24d4204SJose E. Roman 195d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 196792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* call PBLAS subroutine */ 199792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200d24d4204SJose E. Roman 201d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 202792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 203d24d4204SJose E. Roman 204d24d4204SJose E. Roman } else { /* non-transpose */ 205d24d4204SJose E. Roman 206d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 2079566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 2089566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 209d24d4204SJose E. Roman xlld = 1; 210792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 211d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 2129566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 2139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 214d24d4204SJose E. Roman ylld = PetscMax(1, A->rmap->n); 215792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 216d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 217d24d4204SJose E. Roman 218d24d4204SJose E. Roman /* allocate 2d vectors */ 219d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 220d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 2219566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 222d24d4204SJose E. Roman ylld = PetscMax(1, lszy); 223d24d4204SJose E. Roman 224d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 225792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 226d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 227792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 228d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 229d24d4204SJose E. Roman 230d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 231792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 232d24d4204SJose E. Roman 233d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 234792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 235d24d4204SJose E. Roman 236d24d4204SJose E. Roman /* call PBLAS subroutine */ 237792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 238d24d4204SJose E. Roman 239d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 240792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 241d24d4204SJose E. Roman } 2429566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d)); 243d24d4204SJose E. Roman PetscFunctionReturn(0); 244d24d4204SJose E. Roman } 245d24d4204SJose E. Roman 246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 247d71ae5a4SJacob Faibussowitsch { 248d24d4204SJose E. Roman const PetscScalar *xarray; 249d24d4204SJose E. Roman PetscScalar *yarray; 250d24d4204SJose E. Roman 251d24d4204SJose E. Roman PetscFunctionBegin; 2529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2539566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2549566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 2559566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 257d24d4204SJose E. Roman PetscFunctionReturn(0); 258d24d4204SJose E. Roman } 259d24d4204SJose E. Roman 260d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 261d71ae5a4SJacob Faibussowitsch { 262d24d4204SJose E. Roman const PetscScalar *xarray; 263d24d4204SJose E. Roman PetscScalar *yarray; 264d24d4204SJose E. Roman 265d24d4204SJose E. Roman PetscFunctionBegin; 2669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2679566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2689566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 2699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 271d24d4204SJose E. Roman PetscFunctionReturn(0); 272d24d4204SJose E. Roman } 273d24d4204SJose E. Roman 274d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 275d71ae5a4SJacob Faibussowitsch { 276d24d4204SJose E. Roman const PetscScalar *xarray; 277d24d4204SJose E. Roman PetscScalar *zarray; 278d24d4204SJose E. Roman 279d24d4204SJose E. Roman PetscFunctionBegin; 2809566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2819566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2829566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2839566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 2849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 286d24d4204SJose E. Roman PetscFunctionReturn(0); 287d24d4204SJose E. Roman } 288d24d4204SJose E. Roman 289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290d71ae5a4SJacob Faibussowitsch { 291d24d4204SJose E. Roman const PetscScalar *xarray; 292d24d4204SJose E. Roman PetscScalar *zarray; 293d24d4204SJose E. Roman 294d24d4204SJose E. Roman PetscFunctionBegin; 2959566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2989566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 2999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 301d24d4204SJose E. Roman PetscFunctionReturn(0); 302d24d4204SJose E. Roman } 303d24d4204SJose E. Roman 304d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 305d71ae5a4SJacob Faibussowitsch { 306d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 307d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 308d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 309d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 310d24d4204SJose E. Roman PetscBLASInt one = 1; 311d24d4204SJose E. Roman 312d24d4204SJose E. Roman PetscFunctionBegin; 313792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 314d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 315d24d4204SJose E. Roman PetscFunctionReturn(0); 316d24d4204SJose E. Roman } 317d24d4204SJose E. Roman 318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 319d71ae5a4SJacob Faibussowitsch { 320d24d4204SJose E. Roman PetscFunctionBegin; 3219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3229566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3239566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 324d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 325d24d4204SJose E. Roman PetscFunctionReturn(0); 326d24d4204SJose E. Roman } 327d24d4204SJose E. Roman 328d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 329d71ae5a4SJacob Faibussowitsch { 330d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 331d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 332d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 333d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 334d24d4204SJose E. Roman PetscBLASInt one = 1; 335d24d4204SJose E. Roman 336d24d4204SJose E. Roman PetscFunctionBegin; 337792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 338d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 339d24d4204SJose E. Roman PetscFunctionReturn(0); 340d24d4204SJose E. Roman } 341d24d4204SJose E. Roman 342d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 343d71ae5a4SJacob Faibussowitsch { 344d24d4204SJose E. Roman PetscFunctionBegin; 3459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3469566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3479566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 348d24d4204SJose E. Roman PetscFunctionReturn(0); 349d24d4204SJose E. Roman } 350d24d4204SJose E. Roman 351d24d4204SJose E. Roman /* --------------------------------------- */ 352d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 353d71ae5a4SJacob Faibussowitsch { 354d24d4204SJose E. Roman PetscFunctionBegin; 355d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 356d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 357d24d4204SJose E. Roman PetscFunctionReturn(0); 358d24d4204SJose E. Roman } 359d24d4204SJose E. Roman 360d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 361d71ae5a4SJacob Faibussowitsch { 362d24d4204SJose E. Roman PetscFunctionBegin; 363d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 364d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 365d24d4204SJose E. Roman PetscFunctionReturn(0); 366d24d4204SJose E. Roman } 367d24d4204SJose E. Roman 368d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 369d71ae5a4SJacob Faibussowitsch { 370d24d4204SJose E. Roman Mat_Product *product = C->product; 371d24d4204SJose E. Roman 372d24d4204SJose E. Roman PetscFunctionBegin; 373d24d4204SJose E. Roman switch (product->type) { 374d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 375d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 376d71ae5a4SJacob Faibussowitsch break; 377d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 378d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 379d71ae5a4SJacob Faibussowitsch break; 380d71ae5a4SJacob Faibussowitsch default: 381d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 382d24d4204SJose E. Roman } 383d24d4204SJose E. Roman PetscFunctionReturn(0); 384d24d4204SJose E. Roman } 385d24d4204SJose E. Roman /* --------------------------------------- */ 386d24d4204SJose E. Roman 387d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 388d71ae5a4SJacob Faibussowitsch { 389d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 390d24d4204SJose E. Roman PetscScalar *darray, *d2d, v; 391d24d4204SJose E. Roman const PetscInt *ranges; 392d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 393d24d4204SJose E. Roman 394d24d4204SJose E. Roman PetscFunctionBegin; 3959566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray)); 396d24d4204SJose E. Roman 397d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */ 398d24d4204SJose E. Roman 399d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4009566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 4019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 402d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 403792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 404d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 405d24d4204SJose E. Roman 406d24d4204SJose E. Roman /* allocate 2d vector */ 407d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 4089566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 409d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 410d24d4204SJose E. Roman 411d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 412792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 413d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 414d24d4204SJose E. Roman 415d24d4204SJose E. Roman /* collect diagonal */ 416d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) { 417792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 418792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 419d24d4204SJose E. Roman } 420d24d4204SJose E. Roman 421d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 422792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 4239566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 424d24d4204SJose E. Roman 425d24d4204SJose E. Roman } else { /* column version */ 426d24d4204SJose E. Roman 427d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4289566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4299566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 430d24d4204SJose E. Roman dlld = 1; 431792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 432d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 433d24d4204SJose E. Roman 434d24d4204SJose E. Roman /* allocate 2d vector */ 435d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4369566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 437d24d4204SJose E. Roman 438d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 439792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 440d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 441d24d4204SJose E. Roman 442d24d4204SJose E. Roman /* collect diagonal */ 443d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) { 444792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 445792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 446d24d4204SJose E. Roman } 447d24d4204SJose E. Roman 448d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 449792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 4509566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 451d24d4204SJose E. Roman } 452d24d4204SJose E. Roman 4539566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray)); 4549566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4559566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 456d24d4204SJose E. Roman PetscFunctionReturn(0); 457d24d4204SJose E. Roman } 458d24d4204SJose E. Roman 459d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 460d71ae5a4SJacob Faibussowitsch { 461d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 462d24d4204SJose E. Roman const PetscScalar *d; 463d24d4204SJose E. Roman const PetscInt *ranges; 464d24d4204SJose E. Roman PetscScalar *d2d; 465d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 466d24d4204SJose E. Roman 467d24d4204SJose E. Roman PetscFunctionBegin; 468d24d4204SJose E. Roman if (R) { 4699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 470d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4719566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473d24d4204SJose E. Roman dlld = 1; 474792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 476d24d4204SJose E. Roman 477d24d4204SJose E. Roman /* allocate 2d vector */ 478d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4799566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 480d24d4204SJose E. Roman 481d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 484d24d4204SJose E. Roman 485d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 486792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 487d24d4204SJose E. Roman 488d24d4204SJose E. Roman /* broadcast along process columns */ 489d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 490d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 491d24d4204SJose E. Roman 492d24d4204SJose E. Roman /* local scaling */ 4939371c9d4SSatish Balay for (j = 0; j < a->locc; j++) 4949371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 495d24d4204SJose E. Roman 4969566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 4979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 498d24d4204SJose E. Roman } 499d24d4204SJose E. Roman if (L) { 5009566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 501d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5029566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 5039566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 504d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 505792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 506d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 507d24d4204SJose E. Roman 508d24d4204SJose E. Roman /* allocate 2d vector */ 509d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 5109566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 511d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 512d24d4204SJose E. Roman 513d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 514792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 515d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 516d24d4204SJose E. Roman 517d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 518792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 519d24d4204SJose E. Roman 520d24d4204SJose E. Roman /* broadcast along process rows */ 521d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 522d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 523d24d4204SJose E. Roman 524d24d4204SJose E. Roman /* local scaling */ 5259371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 5269371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 527d24d4204SJose E. Roman 5289566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 5299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 530d24d4204SJose E. Roman } 531d24d4204SJose E. Roman PetscFunctionReturn(0); 532d24d4204SJose E. Roman } 533d24d4204SJose E. Roman 534d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 535d71ae5a4SJacob Faibussowitsch { 536d24d4204SJose E. Roman PetscFunctionBegin; 537d24d4204SJose E. Roman *missing = PETSC_FALSE; 538d24d4204SJose E. Roman PetscFunctionReturn(0); 539d24d4204SJose E. Roman } 540d24d4204SJose E. Roman 541d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 542d71ae5a4SJacob Faibussowitsch { 543d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 544d24d4204SJose E. Roman PetscBLASInt n, one = 1; 545d24d4204SJose E. Roman 546d24d4204SJose E. Roman PetscFunctionBegin; 547d24d4204SJose E. Roman n = x->lld * x->locc; 548792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 549d24d4204SJose E. Roman PetscFunctionReturn(0); 550d24d4204SJose E. Roman } 551d24d4204SJose E. Roman 552d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 553d71ae5a4SJacob Faibussowitsch { 554d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 555d24d4204SJose E. Roman PetscBLASInt i, n; 556d24d4204SJose E. Roman PetscScalar v; 557d24d4204SJose E. Roman 558d24d4204SJose E. Roman PetscFunctionBegin; 559d24d4204SJose E. Roman n = PetscMin(x->M, x->N); 560d24d4204SJose E. Roman for (i = 1; i <= n; i++) { 561792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 562d24d4204SJose E. Roman v += alpha; 563792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 564d24d4204SJose E. Roman } 565d24d4204SJose E. Roman PetscFunctionReturn(0); 566d24d4204SJose E. Roman } 567d24d4204SJose E. Roman 568d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 569d71ae5a4SJacob Faibussowitsch { 570d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 571d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 572d24d4204SJose E. Roman PetscBLASInt one = 1; 573d24d4204SJose E. Roman PetscScalar beta = 1.0; 574d24d4204SJose E. Roman 575d24d4204SJose E. Roman PetscFunctionBegin; 576d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3); 577792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 5789566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 579d24d4204SJose E. Roman PetscFunctionReturn(0); 580d24d4204SJose E. Roman } 581d24d4204SJose E. Roman 582d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 583d71ae5a4SJacob Faibussowitsch { 584d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 585d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 586d24d4204SJose E. Roman 587d24d4204SJose E. Roman PetscFunctionBegin; 5889566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 5899566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 590d24d4204SJose E. Roman PetscFunctionReturn(0); 591d24d4204SJose E. Roman } 592d24d4204SJose E. Roman 593d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 594d71ae5a4SJacob Faibussowitsch { 595d24d4204SJose E. Roman Mat Bs; 596d24d4204SJose E. Roman MPI_Comm comm; 597d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 598d24d4204SJose E. Roman 599d24d4204SJose E. Roman PetscFunctionBegin; 6009566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 6019566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs)); 6029566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 6039566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK)); 604d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 605d24d4204SJose E. Roman b->M = a->M; 606d24d4204SJose E. Roman b->N = a->N; 607d24d4204SJose E. Roman b->mb = a->mb; 608d24d4204SJose E. Roman b->nb = a->nb; 609d24d4204SJose E. Roman b->rsrc = a->rsrc; 610d24d4204SJose E. Roman b->csrc = a->csrc; 6119566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 612d24d4204SJose E. Roman *B = Bs; 61348a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 614d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 615d24d4204SJose E. Roman PetscFunctionReturn(0); 616d24d4204SJose E. Roman } 617d24d4204SJose E. Roman 618d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 619d71ae5a4SJacob Faibussowitsch { 620d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 621d24d4204SJose E. Roman Mat Bs = *B; 622d24d4204SJose E. Roman PetscBLASInt one = 1; 623d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 624d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 625d24d4204SJose E. Roman PetscInt i, j; 626d24d4204SJose E. Roman #endif 627d24d4204SJose E. Roman 628d24d4204SJose E. Roman PetscFunctionBegin; 6297fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 630d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6319566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 632d24d4204SJose E. Roman *B = Bs; 633d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 634d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 635792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 636d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 637d24d4204SJose E. Roman /* undo conjugation */ 6389371c9d4SSatish Balay for (i = 0; i < b->locr; i++) 6399371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 640d24d4204SJose E. Roman #endif 641d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 642d24d4204SJose E. Roman PetscFunctionReturn(0); 643d24d4204SJose E. Roman } 644d24d4204SJose E. Roman 645d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 646d71ae5a4SJacob Faibussowitsch { 647d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 648d24d4204SJose E. Roman PetscInt i, j; 649d24d4204SJose E. Roman 650d24d4204SJose E. Roman PetscFunctionBegin; 6519371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 6529371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 653d24d4204SJose E. Roman PetscFunctionReturn(0); 654d24d4204SJose E. Roman } 655d24d4204SJose E. Roman 656d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 657d71ae5a4SJacob Faibussowitsch { 658d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 659d24d4204SJose E. Roman Mat Bs = *B; 660d24d4204SJose E. Roman PetscBLASInt one = 1; 661d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 662d24d4204SJose E. Roman 663d24d4204SJose E. Roman PetscFunctionBegin; 664d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6659566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 666d24d4204SJose E. Roman *B = Bs; 667d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 668d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 669792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 670d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 671d24d4204SJose E. Roman PetscFunctionReturn(0); 672d24d4204SJose E. Roman } 673d24d4204SJose E. Roman 674d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 675d71ae5a4SJacob Faibussowitsch { 676d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 677d24d4204SJose E. Roman PetscScalar *x, *x2d; 678d24d4204SJose E. Roman const PetscInt *ranges; 679d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 680d24d4204SJose E. Roman 681d24d4204SJose E. Roman PetscFunctionBegin; 6829566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X)); 6839566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 684d24d4204SJose E. Roman 685d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 6869566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 6879566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 688d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 689792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 690d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 691d24d4204SJose E. Roman 692d24d4204SJose E. Roman /* allocate 2d vector */ 693d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 6949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d)); 695d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 696d24d4204SJose E. Roman 697d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 698792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 699d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 700d24d4204SJose E. Roman 701d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 702792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 703d24d4204SJose E. Roman 704d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 705d24d4204SJose E. Roman switch (A->factortype) { 706d24d4204SJose E. Roman case MAT_FACTOR_LU: 707792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 708d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 709d24d4204SJose E. Roman break; 710d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 711792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 712d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 713d24d4204SJose E. Roman break; 714d71ae5a4SJacob Faibussowitsch default: 715d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 716d24d4204SJose E. Roman } 717d24d4204SJose E. Roman 718d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 719792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 720d24d4204SJose E. Roman 7219566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 7229566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 723d24d4204SJose E. Roman PetscFunctionReturn(0); 724d24d4204SJose E. Roman } 725d24d4204SJose E. Roman 726d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 727d71ae5a4SJacob Faibussowitsch { 728d24d4204SJose E. Roman PetscFunctionBegin; 7299566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X)); 7309566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y)); 731d24d4204SJose E. Roman PetscFunctionReturn(0); 732d24d4204SJose E. Roman } 733d24d4204SJose E. Roman 734d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 735d71ae5a4SJacob Faibussowitsch { 736d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 737d24d4204SJose E. Roman PetscBool flg1, flg2; 738d24d4204SJose E. Roman PetscBLASInt one = 1, info; 739d24d4204SJose E. Roman 740d24d4204SJose E. Roman PetscFunctionBegin; 7419566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 7429566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 74308401ef6SPierre Jolivet PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 744d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B, 1, X, 2); 745d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)B->data; 746d24d4204SJose E. Roman x = (Mat_ScaLAPACK *)X->data; 7479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 748d24d4204SJose E. Roman 749d24d4204SJose E. Roman switch (A->factortype) { 750d24d4204SJose E. Roman case MAT_FACTOR_LU: 751792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 752d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 753d24d4204SJose E. Roman break; 754d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 755792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 756d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 757d24d4204SJose E. Roman break; 758d71ae5a4SJacob Faibussowitsch default: 759d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 760d24d4204SJose E. Roman } 761d24d4204SJose E. Roman PetscFunctionReturn(0); 762d24d4204SJose E. Roman } 763d24d4204SJose E. Roman 764d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 765d71ae5a4SJacob Faibussowitsch { 766d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 767d24d4204SJose E. Roman PetscBLASInt one = 1, info; 768d24d4204SJose E. Roman 769d24d4204SJose E. Roman PetscFunctionBegin; 7704dfa11a4SJacob Faibussowitsch if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); } 771792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 772d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info); 773d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 774d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 775d24d4204SJose E. Roman 7769566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7779566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 778d24d4204SJose E. Roman PetscFunctionReturn(0); 779d24d4204SJose E. Roman } 780d24d4204SJose E. Roman 781d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 782d71ae5a4SJacob Faibussowitsch { 783d24d4204SJose E. Roman PetscFunctionBegin; 7849566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 7859566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 786d24d4204SJose E. Roman PetscFunctionReturn(0); 787d24d4204SJose E. Roman } 788d24d4204SJose E. Roman 789d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 790d71ae5a4SJacob Faibussowitsch { 791d24d4204SJose E. Roman PetscFunctionBegin; 792d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 793d24d4204SJose E. Roman PetscFunctionReturn(0); 794d24d4204SJose E. Roman } 795d24d4204SJose E. Roman 796d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 797d71ae5a4SJacob Faibussowitsch { 798d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 799d24d4204SJose E. Roman PetscBLASInt one = 1, info; 800d24d4204SJose E. Roman 801d24d4204SJose E. Roman PetscFunctionBegin; 802792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 803d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info); 804d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 805d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 806d24d4204SJose E. Roman 8079566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8089566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 809d24d4204SJose E. Roman PetscFunctionReturn(0); 810d24d4204SJose E. Roman } 811d24d4204SJose E. Roman 812d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 813d71ae5a4SJacob Faibussowitsch { 814d24d4204SJose E. Roman PetscFunctionBegin; 8159566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8169566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 817d24d4204SJose E. Roman PetscFunctionReturn(0); 818d24d4204SJose E. Roman } 819d24d4204SJose E. Roman 820d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 821d71ae5a4SJacob Faibussowitsch { 822d24d4204SJose E. Roman PetscFunctionBegin; 823d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 824d24d4204SJose E. Roman PetscFunctionReturn(0); 825d24d4204SJose E. Roman } 826d24d4204SJose E. Roman 827d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 828d71ae5a4SJacob Faibussowitsch { 829d24d4204SJose E. Roman PetscFunctionBegin; 830d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 831d24d4204SJose E. Roman PetscFunctionReturn(0); 832d24d4204SJose E. Roman } 833d24d4204SJose E. Roman 834d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 835d71ae5a4SJacob Faibussowitsch { 836d24d4204SJose E. Roman Mat B; 83759172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 838d24d4204SJose E. Roman 839d24d4204SJose E. Roman PetscFunctionBegin; 840d24d4204SJose E. Roman /* Create the factorization matrix */ 8419566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 84266e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 843d24d4204SJose E. Roman B->factortype = ftype; 8449566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8459566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 846d24d4204SJose E. Roman 8479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 848d24d4204SJose E. Roman *F = B; 849d24d4204SJose E. Roman PetscFunctionReturn(0); 850d24d4204SJose E. Roman } 851d24d4204SJose E. Roman 852d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 853d71ae5a4SJacob Faibussowitsch { 854d24d4204SJose E. Roman PetscFunctionBegin; 8559566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 8569566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 857d24d4204SJose E. Roman PetscFunctionReturn(0); 858d24d4204SJose E. Roman } 859d24d4204SJose E. Roman 860d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 861d71ae5a4SJacob Faibussowitsch { 862d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 863d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0; 864d24d4204SJose E. Roman const char *ntype; 865d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy; 866d24d4204SJose E. Roman 867d24d4204SJose E. Roman PetscFunctionBegin; 868d24d4204SJose E. Roman switch (type) { 869d24d4204SJose E. Roman case NORM_1: 870d24d4204SJose E. Roman ntype = "1"; 871d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 872d24d4204SJose E. Roman break; 873d24d4204SJose E. Roman case NORM_FROBENIUS: 874d24d4204SJose E. Roman ntype = "F"; 875d24d4204SJose E. Roman work = &dummy; 876d24d4204SJose E. Roman break; 877d24d4204SJose E. Roman case NORM_INFINITY: 878d24d4204SJose E. Roman ntype = "I"; 879d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 880d24d4204SJose E. Roman break; 881d71ae5a4SJacob Faibussowitsch default: 882d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 883d24d4204SJose E. Roman } 8849566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 885d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 8869566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 887d24d4204SJose E. Roman PetscFunctionReturn(0); 888d24d4204SJose E. Roman } 889d24d4204SJose E. Roman 890d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 891d71ae5a4SJacob Faibussowitsch { 892d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 893d24d4204SJose E. Roman 894d24d4204SJose E. Roman PetscFunctionBegin; 8959566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 896d24d4204SJose E. Roman PetscFunctionReturn(0); 897d24d4204SJose E. Roman } 898d24d4204SJose E. Roman 899d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 900d71ae5a4SJacob Faibussowitsch { 901d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 902d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx; 903d24d4204SJose E. Roman 904d24d4204SJose E. Roman PetscFunctionBegin; 905d24d4204SJose E. Roman if (rows) { 906d24d4204SJose E. Roman n = a->locr; 907d24d4204SJose E. Roman nb = a->mb; 908d24d4204SJose E. Roman isrc = a->rsrc; 909d24d4204SJose E. Roman nproc = a->grid->nprow; 910d24d4204SJose E. Roman iproc = a->grid->myrow; 9119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 912d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9139566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 914d24d4204SJose E. Roman } 915d24d4204SJose E. Roman if (cols) { 916d24d4204SJose E. Roman n = a->locc; 917d24d4204SJose E. Roman nb = a->nb; 918d24d4204SJose E. Roman isrc = a->csrc; 919d24d4204SJose E. Roman nproc = a->grid->npcol; 920d24d4204SJose E. Roman iproc = a->grid->mycol; 9219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 922d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9239566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 924d24d4204SJose E. Roman } 925d24d4204SJose E. Roman PetscFunctionReturn(0); 926d24d4204SJose E. Roman } 927d24d4204SJose E. Roman 928d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 929d71ae5a4SJacob Faibussowitsch { 930d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 931d24d4204SJose E. Roman Mat Bmpi; 932d24d4204SJose E. Roman MPI_Comm comm; 9334b1a79daSJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 9344b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork; 9354b1a79daSJose E. Roman const PetscScalar *vwork; 936d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 937d24d4204SJose E. Roman PetscScalar *barray; 9384b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE; 9394b1a79daSJose E. Roman PetscMPIInt size; 940d24d4204SJose E. Roman 941d24d4204SJose E. Roman PetscFunctionBegin; 9429566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9439566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 9444b1a79daSJose E. Roman 9454b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9479566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 9489371c9d4SSatish Balay for (i = 0; i < size; i++) 9499371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) { 9509371c9d4SSatish Balay differ = PETSC_TRUE; 9519371c9d4SSatish Balay break; 9529371c9d4SSatish Balay } 9534b1a79daSJose E. Roman } 9544b1a79daSJose E. Roman 9554b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9569566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 9574b1a79daSJose E. Roman m = PETSC_DECIDE; 9589566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 9594b1a79daSJose E. Roman n = PETSC_DECIDE; 9609566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9629566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 9639566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 9644b1a79daSJose E. Roman 9654b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9669566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 9674b1a79daSJose E. Roman lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 968792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 9694b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info); 9704b1a79daSJose E. Roman 9714b1a79daSJose E. Roman /* redistribute matrix */ 9729566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 973792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 9749566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 9759566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 9769566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 9774b1a79daSJose E. Roman 9784b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9799566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 9804b1a79daSJose E. Roman for (i = rstart; i < rend; i++) { 9819566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 9829566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 9839566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 9844b1a79daSJose E. Roman } 9859566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 9869566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 9879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 9884b1a79daSJose E. Roman 9894b1a79daSJose E. Roman } else { /* normal cases */ 990d24d4204SJose E. Roman 991d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 992d24d4204SJose E. Roman else { 9939566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 99492c846b4SJose E. Roman m = PETSC_DECIDE; 9959566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 99692c846b4SJose E. Roman n = PETSC_DECIDE; 9979566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9999566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 10009566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1001d24d4204SJose E. Roman } 1002d24d4204SJose E. Roman 1003d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10049566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1005d24d4204SJose E. Roman lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 1006792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1007d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1008d24d4204SJose E. Roman 1009d24d4204SJose E. Roman /* redistribute matrix */ 10109566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1011792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10129566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1013d24d4204SJose E. Roman 10149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1016d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10179566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1018d24d4204SJose E. Roman } else *B = Bmpi; 10194b1a79daSJose E. Roman } 1020d24d4204SJose E. Roman PetscFunctionReturn(0); 1021d24d4204SJose E. Roman } 1022d24d4204SJose E. Roman 1023d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1024d71ae5a4SJacob Faibussowitsch { 1025b12397e7SPierre Jolivet const PetscInt *ranges; 1026b12397e7SPierre Jolivet PetscMPIInt size; 1027b12397e7SPierre Jolivet PetscInt i, n; 1028b12397e7SPierre Jolivet 1029b12397e7SPierre Jolivet PetscFunctionBegin; 1030b12397e7SPierre Jolivet *correct = PETSC_TRUE; 1031b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1032b12397e7SPierre Jolivet if (size > 1) { 1033b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges)); 1034b12397e7SPierre Jolivet n = ranges[1] - ranges[0]; 10359371c9d4SSatish Balay for (i = 1; i < size; i++) 10369371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break; 1037b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1038b12397e7SPierre Jolivet } 1039b12397e7SPierre Jolivet PetscFunctionReturn(0); 1040b12397e7SPierre Jolivet } 1041b12397e7SPierre Jolivet 1042d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1043d71ae5a4SJacob Faibussowitsch { 1044d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1045d24d4204SJose E. Roman Mat Bmpi; 1046d24d4204SJose E. Roman MPI_Comm comm; 104792c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1048b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols; 1049d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1050d24d4204SJose E. Roman PetscScalar *aarray; 1051b12397e7SPierre Jolivet IS ir, ic; 10524e8b52a1SJose E. Roman PetscInt lda; 1053b12397e7SPierre Jolivet PetscBool flg; 1054d24d4204SJose E. Roman 1055d24d4204SJose E. Roman PetscFunctionBegin; 10569566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1057d24d4204SJose E. Roman 1058d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1059d24d4204SJose E. Roman else { 10609566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 106192c846b4SJose E. Roman m = PETSC_DECIDE; 10629566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 106392c846b4SJose E. Roman n = PETSC_DECIDE; 10649566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10669566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 10679566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1068d24d4204SJose E. Roman } 1069d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data; 1070d24d4204SJose E. Roman 1071b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 1072b12397e7SPierre Jolivet PetscCall(MatDenseGetArray(A, &aarray)); 1073b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1074b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1075b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1076d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 10779566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 10789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 10794e8b52a1SJose E. Roman lld = PetscMax(lda, 1); /* local leading dimension */ 1080792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1081d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1082d24d4204SJose E. Roman 1083d24d4204SJose E. Roman /* redistribute matrix */ 1084792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1085b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1086b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1087b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1088b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1089b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1090b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows)); 1091b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols)); 1092b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1093b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows)); 1094b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols)); 1095b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1096b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1097b12397e7SPierre Jolivet } 10989566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A, &aarray)); 10999566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 11009566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1101d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 11029566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1103d24d4204SJose E. Roman } else *B = Bmpi; 1104d24d4204SJose E. Roman PetscFunctionReturn(0); 1105d24d4204SJose E. Roman } 1106d24d4204SJose E. Roman 1107d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1108d71ae5a4SJacob Faibussowitsch { 1109d24d4204SJose E. Roman Mat mat_scal; 1110d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1111d24d4204SJose E. Roman const PetscInt *cols; 1112d24d4204SJose E. Roman const PetscScalar *vals; 1113d24d4204SJose E. Roman 1114d24d4204SJose E. Roman PetscFunctionBegin; 1115d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1116d24d4204SJose E. Roman mat_scal = *newmat; 11179566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1118d24d4204SJose E. Roman } else { 11199566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1120d24d4204SJose E. Roman m = PETSC_DECIDE; 11219566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1122d24d4204SJose E. Roman n = PETSC_DECIDE; 11239566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11249566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11259566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11269566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1127d24d4204SJose E. Roman } 1128d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11299566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11309566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 11319566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1132d24d4204SJose E. Roman } 11339566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11349566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1135d24d4204SJose E. Roman 11369566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1137d24d4204SJose E. Roman else *newmat = mat_scal; 1138d24d4204SJose E. Roman PetscFunctionReturn(0); 1139d24d4204SJose E. Roman } 1140d24d4204SJose E. Roman 1141d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1142d71ae5a4SJacob Faibussowitsch { 1143d24d4204SJose E. Roman Mat mat_scal; 1144d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1145d24d4204SJose E. Roman const PetscInt *cols; 1146d24d4204SJose E. Roman const PetscScalar *vals; 1147d24d4204SJose E. Roman PetscScalar v; 1148d24d4204SJose E. Roman 1149d24d4204SJose E. Roman PetscFunctionBegin; 1150d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1151d24d4204SJose E. Roman mat_scal = *newmat; 11529566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1153d24d4204SJose E. Roman } else { 11549566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1155d24d4204SJose E. Roman m = PETSC_DECIDE; 11569566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1157d24d4204SJose E. Roman n = PETSC_DECIDE; 11589566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11609566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11619566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1162d24d4204SJose E. Roman } 11639566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1164d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11659566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11669566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1167d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */ 1168d24d4204SJose E. Roman if (cols[j] == row) continue; 1169b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 11709566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1171d24d4204SJose E. Roman } 11729566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1173d24d4204SJose E. Roman } 11749566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 11759566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11769566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1177d24d4204SJose E. Roman 11789566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1179d24d4204SJose E. Roman else *newmat = mat_scal; 1180d24d4204SJose E. Roman PetscFunctionReturn(0); 1181d24d4204SJose E. Roman } 1182d24d4204SJose E. Roman 1183d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1184d71ae5a4SJacob Faibussowitsch { 1185d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1186d24d4204SJose E. Roman PetscInt sz = 0; 1187d24d4204SJose E. Roman 1188d24d4204SJose E. Roman PetscFunctionBegin; 11899566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11909566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1191d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1192d24d4204SJose E. Roman 11939566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11949566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 11959566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc)); 1196d24d4204SJose E. Roman 1197d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1198d24d4204SJose E. Roman PetscFunctionReturn(0); 1199d24d4204SJose E. Roman } 1200d24d4204SJose E. Roman 1201d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1202d71ae5a4SJacob Faibussowitsch { 1203d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1204d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1205d24d4204SJose E. Roman PetscBool flg; 1206d24d4204SJose E. Roman MPI_Comm icomm; 1207d24d4204SJose E. Roman 1208d24d4204SJose E. Roman PetscFunctionBegin; 12099566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 12109566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12119566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 12129566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 12139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1214d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1215d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1216d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1217d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 12189566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 12199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1220d24d4204SJose E. Roman } 12219566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 12229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 12239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 12259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 12269566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1227d24d4204SJose E. Roman PetscFunctionReturn(0); 1228d24d4204SJose E. Roman } 1229d24d4204SJose E. Roman 1230d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1231d71ae5a4SJacob Faibussowitsch { 1232d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1233d24d4204SJose E. Roman PetscBLASInt info = 0; 1234b12397e7SPierre Jolivet PetscBool flg; 1235d24d4204SJose E. Roman 1236d24d4204SJose E. Roman PetscFunctionBegin; 12379566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12389566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1239d24d4204SJose E. Roman 1240b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1241b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1242b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1243b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1244b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1245d24d4204SJose E. Roman 1246d24d4204SJose E. Roman /* compute local sizes */ 12479566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 12489566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1249d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1250d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1251d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr); 1252d24d4204SJose E. Roman 1253d24d4204SJose E. Roman /* allocate local array */ 12549566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1255d24d4204SJose E. Roman 1256d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1257792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1258d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1259d24d4204SJose E. Roman PetscFunctionReturn(0); 1260d24d4204SJose E. Roman } 1261d24d4204SJose E. Roman 1262d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1263d71ae5a4SJacob Faibussowitsch { 1264d24d4204SJose E. Roman PetscInt nstash, reallocs; 1265d24d4204SJose E. Roman 1266d24d4204SJose E. Roman PetscFunctionBegin; 1267d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 12689566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 12699566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 12709566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1271d24d4204SJose E. Roman PetscFunctionReturn(0); 1272d24d4204SJose E. Roman } 1273d24d4204SJose E. Roman 1274d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1275d71ae5a4SJacob Faibussowitsch { 1276d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1277d24d4204SJose E. Roman PetscMPIInt n; 1278d24d4204SJose E. Roman PetscInt i, flg, *row, *col; 1279d24d4204SJose E. Roman PetscScalar *val; 1280d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1281d24d4204SJose E. Roman 1282d24d4204SJose E. Roman PetscFunctionBegin; 1283d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1284d24d4204SJose E. Roman while (1) { 12859566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1286d24d4204SJose E. Roman if (!flg) break; 1287d24d4204SJose E. Roman for (i = 0; i < n; i++) { 12889566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 12899566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1290792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1291aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1292d24d4204SJose E. Roman switch (A->insertmode) { 1293d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 1294d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1295d71ae5a4SJacob Faibussowitsch break; 1296d71ae5a4SJacob Faibussowitsch case ADD_VALUES: 1297d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1298d71ae5a4SJacob Faibussowitsch break; 1299d71ae5a4SJacob Faibussowitsch default: 1300d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1301d24d4204SJose E. Roman } 1302d24d4204SJose E. Roman } 1303d24d4204SJose E. Roman } 13049566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 1305d24d4204SJose E. Roman PetscFunctionReturn(0); 1306d24d4204SJose E. Roman } 1307d24d4204SJose E. Roman 1308d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1309d71ae5a4SJacob Faibussowitsch { 1310d24d4204SJose E. Roman Mat Adense, As; 1311d24d4204SJose E. Roman MPI_Comm comm; 1312d24d4204SJose E. Roman 1313d24d4204SJose E. Roman PetscFunctionBegin; 13149566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 13159566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense)); 13169566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE)); 13179566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer)); 13189566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 13199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 13209566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As)); 1321d24d4204SJose E. Roman PetscFunctionReturn(0); 1322d24d4204SJose E. Roman } 1323d24d4204SJose E. Roman 1324d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 13259371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1326d24d4204SJose E. Roman 0, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman MatMult_ScaLAPACK, 1329d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1330d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1331d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1332d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1333d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1334d24d4204SJose E. Roman 0, 1335d24d4204SJose E. Roman /*10*/ 0, 1336d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1337d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1340d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1341d24d4204SJose E. Roman 0, 1342d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1343d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1344d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1345d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1346d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1347d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1348d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1349d24d4204SJose E. Roman /*24*/ 0, 1350d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1351d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1352d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1353d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1354d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1355d24d4204SJose E. Roman 0, 1356d24d4204SJose E. Roman 0, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1360d24d4204SJose E. Roman 0, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1365d24d4204SJose E. Roman 0, 1366d24d4204SJose E. Roman 0, 1367d24d4204SJose E. Roman 0, 1368d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1369d24d4204SJose E. Roman /*44*/ 0, 1370d24d4204SJose E. Roman MatScale_ScaLAPACK, 1371d24d4204SJose E. Roman MatShift_ScaLAPACK, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman 0, 1374d24d4204SJose E. Roman /*49*/ 0, 1375d24d4204SJose E. Roman 0, 1376d24d4204SJose E. Roman 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman 0, 1379d24d4204SJose E. Roman /*54*/ 0, 1380d24d4204SJose E. Roman 0, 1381d24d4204SJose E. Roman 0, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman /*59*/ 0, 1385d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1386d24d4204SJose E. Roman MatView_ScaLAPACK, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman /*64*/ 0, 1390d24d4204SJose E. Roman 0, 1391d24d4204SJose E. Roman 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman /*69*/ 0, 1395d24d4204SJose E. Roman 0, 1396d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1397d24d4204SJose E. Roman 0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman /*74*/ 0, 1400d24d4204SJose E. Roman 0, 1401d24d4204SJose E. Roman 0, 1402d24d4204SJose E. Roman 0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman /*79*/ 0, 1405d24d4204SJose E. Roman 0, 1406d24d4204SJose E. Roman 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1409d24d4204SJose E. Roman /*84*/ 0, 1410d24d4204SJose E. Roman 0, 1411d24d4204SJose E. Roman 0, 1412d24d4204SJose E. Roman 0, 1413d24d4204SJose E. Roman 0, 1414d24d4204SJose E. Roman /*89*/ 0, 1415d24d4204SJose E. Roman 0, 1416d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman /*94*/ 0, 1420d24d4204SJose E. Roman 0, 1421d24d4204SJose E. Roman 0, 1422d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1423d24d4204SJose E. Roman 0, 1424d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1425d24d4204SJose E. Roman 0, 1426d24d4204SJose E. Roman 0, 1427d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1428d24d4204SJose E. Roman 0, 1429d24d4204SJose E. Roman /*104*/ 0, 1430d24d4204SJose E. Roman 0, 1431d24d4204SJose E. Roman 0, 1432d24d4204SJose E. Roman 0, 1433d24d4204SJose E. Roman 0, 1434d24d4204SJose E. Roman /*109*/ MatMatSolve_ScaLAPACK, 1435d24d4204SJose E. Roman 0, 1436d24d4204SJose E. Roman 0, 1437d24d4204SJose E. Roman 0, 1438d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1439d24d4204SJose E. Roman /*114*/ 0, 1440d24d4204SJose E. Roman 0, 1441d24d4204SJose E. Roman 0, 1442d24d4204SJose E. Roman 0, 1443d24d4204SJose E. Roman 0, 1444d24d4204SJose E. Roman /*119*/ 0, 1445d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1446d24d4204SJose E. Roman 0, 1447d24d4204SJose E. Roman 0, 1448d24d4204SJose E. Roman 0, 1449d24d4204SJose E. Roman /*124*/ 0, 1450d24d4204SJose E. Roman 0, 1451d24d4204SJose E. Roman 0, 1452d24d4204SJose E. Roman 0, 1453d24d4204SJose E. Roman 0, 1454d24d4204SJose E. Roman /*129*/ 0, 1455d24d4204SJose E. Roman 0, 1456d24d4204SJose E. Roman 0, 1457d24d4204SJose E. Roman 0, 1458d24d4204SJose E. Roman 0, 1459d24d4204SJose E. Roman /*134*/ 0, 1460d24d4204SJose E. Roman 0, 1461d24d4204SJose E. Roman 0, 1462d24d4204SJose E. Roman 0, 1463d24d4204SJose E. Roman 0, 1464d24d4204SJose E. Roman 0, 1465d24d4204SJose E. Roman /*140*/ 0, 1466d24d4204SJose E. Roman 0, 1467d24d4204SJose E. Roman 0, 1468d24d4204SJose E. Roman 0, 1469d24d4204SJose E. Roman 0, 1470d24d4204SJose E. Roman /*145*/ 0, 1471d24d4204SJose E. Roman 0, 147299a7f59eSMark Adams 0, 147399a7f59eSMark Adams 0, 14747fb60732SBarry Smith 0, 14759371c9d4SSatish Balay /*150*/ 0}; 1476d24d4204SJose E. Roman 1477d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1478d71ae5a4SJacob Faibussowitsch { 1479d24d4204SJose E. Roman PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1480d24d4204SJose E. Roman PetscInt size = stash->size, nsends; 1481d24d4204SJose E. Roman PetscInt count, *sindices, **rindices, i, j, l; 1482d24d4204SJose E. Roman PetscScalar **rvalues, *svalues; 1483d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1484d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1485d24d4204SJose E. Roman PetscMPIInt *sizes, *nlengths, nreceives; 1486d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy; 1487d24d4204SJose E. Roman PetscScalar *sp_val; 1488d24d4204SJose E. Roman PetscMatStashSpace space, space_next; 1489d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1490d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1491d24d4204SJose E. Roman 1492d24d4204SJose E. Roman PetscFunctionBegin; 1493d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1494d24d4204SJose E. Roman InsertMode addv; 14951c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 149608401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1497d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1498d24d4204SJose E. Roman } 1499d24d4204SJose E. Roman 1500d24d4204SJose E. Roman bs2 = stash->bs * stash->bs; 1501d24d4204SJose E. Roman 1502d24d4204SJose E. Roman /* first count number of contributors to each processor */ 15039566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 15049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1505d24d4204SJose E. Roman 1506d24d4204SJose E. Roman i = j = 0; 1507d24d4204SJose E. Roman space = stash->space_head; 1508d24d4204SJose E. Roman while (space) { 1509d24d4204SJose E. Roman space_next = space->next; 1510d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 15119566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 15129566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1513792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1514d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 15159371c9d4SSatish Balay nlengths[j]++; 15169371c9d4SSatish Balay owner[i] = j; 1517d24d4204SJose E. Roman i++; 1518d24d4204SJose E. Roman } 1519d24d4204SJose E. Roman space = space_next; 1520d24d4204SJose E. Roman } 1521d24d4204SJose E. Roman 1522d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 15239566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 1524d24d4204SJose E. Roman for (i = 0, nsends = 0; i < size; i++) { 1525d24d4204SJose E. Roman if (nlengths[i]) { 15269371c9d4SSatish Balay sizes[i] = 1; 15279371c9d4SSatish Balay nsends++; 1528d24d4204SJose E. Roman } 1529d24d4204SJose E. Roman } 1530d24d4204SJose E. Roman 15319371c9d4SSatish Balay { 15329371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 1533d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 15349566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 15359566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1536d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1537d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] *= 2; 15389566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1539d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1540d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 15419566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 15429566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 15439371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 15449371c9d4SSatish Balay } 1545d24d4204SJose E. Roman 1546d24d4204SJose E. Roman /* do sends: 1547d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1548d24d4204SJose E. Roman the ith processor 1549d24d4204SJose E. Roman */ 15509566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 15519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 15529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1553d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 15549371c9d4SSatish Balay startv[0] = 0; 15559371c9d4SSatish Balay starti[0] = 0; 1556d24d4204SJose E. Roman for (i = 1; i < size; i++) { 1557d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1]; 1558d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1559d24d4204SJose E. Roman } 1560d24d4204SJose E. Roman 1561d24d4204SJose E. Roman i = 0; 1562d24d4204SJose E. Roman space = stash->space_head; 1563d24d4204SJose E. Roman while (space) { 1564d24d4204SJose E. Roman space_next = space->next; 1565d24d4204SJose E. Roman sp_idx = space->idx; 1566d24d4204SJose E. Roman sp_idy = space->idy; 1567d24d4204SJose E. Roman sp_val = space->val; 1568d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 1569d24d4204SJose E. Roman j = owner[i]; 1570d24d4204SJose E. Roman if (bs2 == 1) { 1571d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1572d24d4204SJose E. Roman } else { 1573d24d4204SJose E. Roman PetscInt k; 1574d24d4204SJose E. Roman PetscScalar *buf1, *buf2; 1575d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j]; 1576d24d4204SJose E. Roman buf2 = space->val + bs2 * l; 1577d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1578d24d4204SJose E. Roman } 1579d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1580d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1581d24d4204SJose E. Roman startv[j]++; 1582d24d4204SJose E. Roman starti[j]++; 1583d24d4204SJose E. Roman i++; 1584d24d4204SJose E. Roman } 1585d24d4204SJose E. Roman space = space_next; 1586d24d4204SJose E. Roman } 1587d24d4204SJose E. Roman startv[0] = 0; 1588d24d4204SJose E. Roman for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1589d24d4204SJose E. Roman 1590d24d4204SJose E. Roman for (i = 0, count = 0; i < size; i++) { 1591d24d4204SJose E. Roman if (sizes[i]) { 15929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 15939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1594d24d4204SJose E. Roman } 1595d24d4204SJose E. Roman } 1596d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 15979566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1598d24d4204SJose E. Roman for (i = 0; i < size; i++) { 159948a46eb9SPierre Jolivet if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1600d24d4204SJose E. Roman } 1601d24d4204SJose E. Roman #endif 16029566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 16039566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 16049566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 16059566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1606d24d4204SJose E. Roman 1607d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 16089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1609d24d4204SJose E. Roman 1610d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) { 1611d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i]; 1612d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i]; 1613d24d4204SJose E. Roman } 1614d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1615d24d4204SJose E. Roman 16169566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 16179566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1618d24d4204SJose E. Roman 1619d24d4204SJose E. Roman stash->svalues = svalues; 1620d24d4204SJose E. Roman stash->sindices = sindices; 1621d24d4204SJose E. Roman stash->rvalues = rvalues; 1622d24d4204SJose E. Roman stash->rindices = rindices; 1623d24d4204SJose E. Roman stash->send_waits = send_waits; 1624d24d4204SJose E. Roman stash->nsends = nsends; 1625d24d4204SJose E. Roman stash->nrecvs = nreceives; 1626d24d4204SJose E. Roman stash->reproduce_count = 0; 1627d24d4204SJose E. Roman PetscFunctionReturn(0); 1628d24d4204SJose E. Roman } 1629d24d4204SJose E. Roman 1630d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1631d71ae5a4SJacob Faibussowitsch { 1632d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1633d24d4204SJose E. Roman 1634d24d4204SJose E. Roman PetscFunctionBegin; 163528b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1636aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1637aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 16389566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 16399566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1640d24d4204SJose E. Roman PetscFunctionReturn(0); 1641d24d4204SJose E. Roman } 1642d24d4204SJose E. Roman 1643d24d4204SJose E. Roman /*@ 16446aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 164511a5261eSBarry Smith the `MATSCALAPACK` matrix 1646d24d4204SJose E. Roman 1647*c3339decSBarry Smith Logically Collective 1648d24d4204SJose E. Roman 1649d8d19677SJose E. Roman Input Parameters: 165011a5261eSBarry Smith + A - a `MATSCALAPACK` matrix 1651d24d4204SJose E. Roman . mb - the row block size 1652d24d4204SJose E. Roman - nb - the column block size 1653d24d4204SJose E. Roman 1654d24d4204SJose E. Roman Level: intermediate 1655d24d4204SJose E. Roman 165611a5261eSBarry Smith .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1657d24d4204SJose E. Roman @*/ 1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1659d71ae5a4SJacob Faibussowitsch { 1660d24d4204SJose E. Roman PetscFunctionBegin; 1661d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1662d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2); 1663d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3); 1664cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1665d24d4204SJose E. Roman PetscFunctionReturn(0); 1666d24d4204SJose E. Roman } 1667d24d4204SJose E. Roman 1668d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1669d71ae5a4SJacob Faibussowitsch { 1670d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1671d24d4204SJose E. Roman 1672d24d4204SJose E. Roman PetscFunctionBegin; 1673d24d4204SJose E. Roman if (mb) *mb = a->mb; 1674d24d4204SJose E. Roman if (nb) *nb = a->nb; 1675d24d4204SJose E. Roman PetscFunctionReturn(0); 1676d24d4204SJose E. Roman } 1677d24d4204SJose E. Roman 1678d24d4204SJose E. Roman /*@ 16796aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 168011a5261eSBarry Smith the `MATSCALAPACK` matrix 1681d24d4204SJose E. Roman 1682d24d4204SJose E. Roman Not collective 1683d24d4204SJose E. Roman 1684d24d4204SJose E. Roman Input Parameter: 168511a5261eSBarry Smith . A - a `MATSCALAPACK` matrix 1686d24d4204SJose E. Roman 1687d24d4204SJose E. Roman Output Parameters: 1688d24d4204SJose E. Roman + mb - the row block size 1689d24d4204SJose E. Roman - nb - the column block size 1690d24d4204SJose E. Roman 1691d24d4204SJose E. Roman Level: intermediate 1692d24d4204SJose E. Roman 169311a5261eSBarry Smith .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1694d24d4204SJose E. Roman @*/ 1695d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1696d71ae5a4SJacob Faibussowitsch { 1697d24d4204SJose E. Roman PetscFunctionBegin; 1698d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1699cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1700d24d4204SJose E. Roman PetscFunctionReturn(0); 1701d24d4204SJose E. Roman } 1702d24d4204SJose E. Roman 1703d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1704d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1705d24d4204SJose E. Roman 1706d24d4204SJose E. Roman /*MC 1707d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1708d24d4204SJose E. Roman 1709d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1710d24d4204SJose E. Roman 1711d24d4204SJose E. Roman Options Database Keys: 171211a5261eSBarry Smith + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()` 171389bba20eSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1714d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1715d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1716d24d4204SJose E. Roman 171789bba20eSBarry Smith Note: 171889bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 171989bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 172089bba20eSBarry Smith the given rank. 172189bba20eSBarry Smith 1722d24d4204SJose E. Roman Level: beginner 1723d24d4204SJose E. Roman 172411a5261eSBarry Smith .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1725d24d4204SJose E. Roman M*/ 1726d24d4204SJose E. Roman 1727d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1728d71ae5a4SJacob Faibussowitsch { 1729d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1730d24d4204SJose E. Roman PetscBool flg, flg1; 1731d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1732d24d4204SJose E. Roman MPI_Comm icomm; 1733d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol; 1734d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0}; 1735d24d4204SJose E. Roman PetscMPIInt size; 1736d24d4204SJose E. Roman 1737d24d4204SJose E. Roman PetscFunctionBegin; 17389566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 1739d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1740d24d4204SJose E. Roman 17419566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1742d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1743d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1744d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1745d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1746d24d4204SJose E. Roman 17474dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1748d24d4204SJose E. Roman A->data = (void *)a; 1749d24d4204SJose E. Roman 1750d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1751d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 17529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 17539566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 17549566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1755d24d4204SJose E. Roman } 17569566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 17579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1758d24d4204SJose E. Roman if (!flg) { 17594dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&grid)); 1760d24d4204SJose E. Roman 17619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size)); 1762d24d4204SJose E. Roman grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1763d24d4204SJose E. Roman 1764d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 17659566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1766d24d4204SJose E. Roman if (flg1) { 176708401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1768d24d4204SJose E. Roman grid->nprow = optv1; 1769d24d4204SJose E. Roman } 1770d0609cedSBarry Smith PetscOptionsEnd(); 1771d24d4204SJose E. Roman 1772d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1773d24d4204SJose E. Roman grid->npcol = size / grid->nprow; 17749566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 17759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1776f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1777d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1778d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1779d24d4204SJose E. Roman grid->grid_refct = 1; 1780d24d4204SJose E. Roman grid->nprow = nprow; 1781d24d4204SJose E. Roman grid->npcol = npcol; 1782d24d4204SJose E. Roman grid->myrow = myrow; 1783d24d4204SJose E. Roman grid->mycol = mycol; 1784d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1785f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1786d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1787f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1788d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 17899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1790d24d4204SJose E. Roman 1791d24d4204SJose E. Roman } else grid->grid_refct++; 17929566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1793d24d4204SJose E. Roman a->grid = grid; 1794d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1795d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1796d24d4204SJose E. Roman 1797d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 17989566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1799d24d4204SJose E. Roman if (flg) { 1800d24d4204SJose E. Roman a->mb = array[0]; 1801d24d4204SJose E. Roman a->nb = (k > 1) ? array[1] : a->mb; 1802d24d4204SJose E. Roman } 1803d0609cedSBarry Smith PetscOptionsEnd(); 1804d24d4204SJose E. Roman 1805b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 18069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 18079566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 18089566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 18099566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1810d24d4204SJose E. Roman PetscFunctionReturn(0); 1811d24d4204SJose E. Roman } 1812d24d4204SJose E. Roman 1813d24d4204SJose E. Roman /*@C 1814d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 181511a5261eSBarry Smith (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1816d24d4204SJose E. Roman 1817d24d4204SJose E. Roman Collective 1818d24d4204SJose E. Roman 1819d24d4204SJose E. Roman Input Parameters: 1820d24d4204SJose E. Roman + comm - MPI communicator 182111a5261eSBarry Smith . mb - row block size (or `PETSC_DECIDE` to have it set) 182211a5261eSBarry Smith . nb - column block size (or `PETSC_DECIDE` to have it set) 1823d24d4204SJose E. Roman . M - number of global rows 1824d24d4204SJose E. Roman . N - number of global columns 1825d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1826d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1827d24d4204SJose E. Roman 1828d24d4204SJose E. Roman Output Parameter: 1829d24d4204SJose E. Roman . A - the matrix 1830d24d4204SJose E. Roman 183111a5261eSBarry Smith Options Database Key: 1832d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1833d24d4204SJose E. Roman 183411a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1835d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 183611a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1837d24d4204SJose E. Roman 183811a5261eSBarry Smith Note: 183911a5261eSBarry Smith If `PETSC_DECIDE` is used for the block sizes, then an appropriate value 1840d24d4204SJose E. Roman is chosen. 1841d24d4204SJose E. Roman 1842d24d4204SJose E. Roman Storage Information: 1843d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1844d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1845d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 184611a5261eSBarry Smith used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1847d24d4204SJose E. Roman 1848d24d4204SJose E. Roman Level: intermediate 1849d24d4204SJose E. Roman 185011a5261eSBarry Smith .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1851d24d4204SJose E. Roman @*/ 1852d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1853d71ae5a4SJacob Faibussowitsch { 1854d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1855d24d4204SJose E. Roman PetscInt m, n; 1856d24d4204SJose E. Roman 1857d24d4204SJose E. Roman PetscFunctionBegin; 18589566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 18599566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK)); 1860aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1861d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1862d24d4204SJose E. Roman m = PETSC_DECIDE; 18639566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1864d24d4204SJose E. Roman n = PETSC_DECIDE; 18659566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 18669566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1867d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data; 18689566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M)); 18699566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N)); 18709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 18719566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 18729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 18739566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 18749566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 1875d24d4204SJose E. Roman PetscFunctionReturn(0); 1876d24d4204SJose E. Roman } 1877