1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23d71ae5a4SJacob Faibussowitsch { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31d71ae5a4SJacob Faibussowitsch { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39d24d4204SJose E. Roman if (iascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59d71ae5a4SJacob Faibussowitsch { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 7257168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 7657168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 844dfa11a4SJacob Faibussowitsch info->memory = 0; /* REVIEW ME */ 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92d71ae5a4SJacob Faibussowitsch { 93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94b12397e7SPierre Jolivet 95d24d4204SJose E. Roman PetscFunctionBegin; 96d24d4204SJose E. Roman switch (op) { 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 100d24d4204SJose E. Roman case MAT_SYMMETRIC: 101d24d4204SJose E. Roman case MAT_SORTED_FULL: 102d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 103d71ae5a4SJacob Faibussowitsch break; 104d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 105d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 106d71ae5a4SJacob Faibussowitsch break; 107d71ae5a4SJacob Faibussowitsch default: 108d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109d24d4204SJose E. Roman } 1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman 113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114d71ae5a4SJacob Faibussowitsch { 115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116d24d4204SJose E. Roman PetscInt i, j; 117d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 119d24d4204SJose E. Roman 120d24d4204SJose E. Roman PetscFunctionBegin; 121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122d24d4204SJose E. Roman for (i = 0; i < nr; i++) { 123d24d4204SJose E. Roman if (rows[i] < 0) continue; 1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125d24d4204SJose E. Roman for (j = 0; j < nc; j++) { 126d24d4204SJose E. Roman if (cols[j] < 0) continue; 1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130b12397e7SPierre Jolivet if (roworiented) { 131d24d4204SJose E. Roman switch (imode) { 132d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 133d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134d71ae5a4SJacob Faibussowitsch break; 135d71ae5a4SJacob Faibussowitsch default: 136d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137d71ae5a4SJacob Faibussowitsch break; 138b12397e7SPierre Jolivet } 139b12397e7SPierre Jolivet } else { 140b12397e7SPierre Jolivet switch (imode) { 141d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 142d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143d71ae5a4SJacob Faibussowitsch break; 144d71ae5a4SJacob Faibussowitsch default: 145d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146d71ae5a4SJacob Faibussowitsch break; 147b12397e7SPierre Jolivet } 148d24d4204SJose E. Roman } 149d24d4204SJose E. Roman } else { 15028b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 152b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153d24d4204SJose E. Roman } 154d24d4204SJose E. Roman } 155d24d4204SJose E. Roman } 1563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 157d24d4204SJose E. Roman } 158d24d4204SJose E. Roman 159d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160d71ae5a4SJacob Faibussowitsch { 161d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0; 163d24d4204SJose E. Roman const PetscInt *ranges; 164d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman PetscFunctionBegin; 167d24d4204SJose E. Roman if (transpose) { 168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 172792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1749566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 1759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176d24d4204SJose E. Roman ylld = 1; 177792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 179d24d4204SJose E. Roman 180d24d4204SJose E. Roman /* allocate 2d vectors */ 181d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 189792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 191d24d4204SJose E. Roman 192d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 193*c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194d24d4204SJose E. Roman 195d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 196792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* call PBLAS subroutine */ 199792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200d24d4204SJose E. Roman 201d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 202792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 203d24d4204SJose E. Roman 204d24d4204SJose E. Roman } else { /* non-transpose */ 205d24d4204SJose E. Roman 206d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 2079566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 2089566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 209d24d4204SJose E. Roman xlld = 1; 210792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 211d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 2129566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 2139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 214d24d4204SJose E. Roman ylld = PetscMax(1, A->rmap->n); 215792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 216d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 217d24d4204SJose E. Roman 218d24d4204SJose E. Roman /* allocate 2d vectors */ 219d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 220d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 2219566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 222d24d4204SJose E. Roman ylld = PetscMax(1, lszy); 223d24d4204SJose E. Roman 224d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 225792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 226d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 227792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 228d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 229d24d4204SJose E. Roman 230d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 231*c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 232d24d4204SJose E. Roman 233d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 234792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 235d24d4204SJose E. Roman 236d24d4204SJose E. Roman /* call PBLAS subroutine */ 237792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 238d24d4204SJose E. Roman 239d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 240792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 241d24d4204SJose E. Roman } 2429566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d)); 2433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 244d24d4204SJose E. Roman } 245d24d4204SJose E. Roman 246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 247d71ae5a4SJacob Faibussowitsch { 248d24d4204SJose E. Roman const PetscScalar *xarray; 249d24d4204SJose E. Roman PetscScalar *yarray; 250d24d4204SJose E. Roman 251d24d4204SJose E. Roman PetscFunctionBegin; 2529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2539566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2549566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 2559566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 258d24d4204SJose E. Roman } 259d24d4204SJose E. Roman 260d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 261d71ae5a4SJacob Faibussowitsch { 262d24d4204SJose E. Roman const PetscScalar *xarray; 263d24d4204SJose E. Roman PetscScalar *yarray; 264d24d4204SJose E. Roman 265d24d4204SJose E. Roman PetscFunctionBegin; 2669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2679566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray)); 2689566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 2699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray)); 2713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 272d24d4204SJose E. Roman } 273d24d4204SJose E. Roman 274d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 275d71ae5a4SJacob Faibussowitsch { 276d24d4204SJose E. Roman const PetscScalar *xarray; 277d24d4204SJose E. Roman PetscScalar *zarray; 278d24d4204SJose E. Roman 279d24d4204SJose E. Roman PetscFunctionBegin; 2809566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2819566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2829566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2839566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 2849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 2859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 2863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 287d24d4204SJose E. Roman } 288d24d4204SJose E. Roman 289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290d71ae5a4SJacob Faibussowitsch { 291d24d4204SJose E. Roman const PetscScalar *xarray; 292d24d4204SJose E. Roman PetscScalar *zarray; 293d24d4204SJose E. Roman 294d24d4204SJose E. Roman PetscFunctionBegin; 2959566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z)); 2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray)); 2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray)); 2989566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 2999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray)); 3009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray)); 3013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 302d24d4204SJose E. Roman } 303d24d4204SJose E. Roman 304d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 305d71ae5a4SJacob Faibussowitsch { 306d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 307d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 308d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 309d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 310d24d4204SJose E. Roman PetscBLASInt one = 1; 311d24d4204SJose E. Roman 312d24d4204SJose E. Roman PetscFunctionBegin; 313792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 314d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 316d24d4204SJose E. Roman } 317d24d4204SJose E. Roman 318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 319d71ae5a4SJacob Faibussowitsch { 320d24d4204SJose E. Roman PetscFunctionBegin; 3219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3229566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3239566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 324d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 3253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 326d24d4204SJose E. Roman } 327d24d4204SJose E. Roman 328d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 329d71ae5a4SJacob Faibussowitsch { 330d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 331d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 332d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 333d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 334d24d4204SJose E. Roman PetscBLASInt one = 1; 335d24d4204SJose E. Roman 336d24d4204SJose E. Roman PetscFunctionBegin; 337792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 338d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 3393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 340d24d4204SJose E. Roman } 341d24d4204SJose E. Roman 342d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 343d71ae5a4SJacob Faibussowitsch { 344d24d4204SJose E. Roman PetscFunctionBegin; 3459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3469566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK)); 3479566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 3483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 349d24d4204SJose E. Roman } 350d24d4204SJose E. Roman 351d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 352d71ae5a4SJacob Faibussowitsch { 353d24d4204SJose E. Roman PetscFunctionBegin; 354d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 355d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 3563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 357d24d4204SJose E. Roman } 358d24d4204SJose E. Roman 359d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 360d71ae5a4SJacob Faibussowitsch { 361d24d4204SJose E. Roman PetscFunctionBegin; 362d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 363d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 3643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 365d24d4204SJose E. Roman } 366d24d4204SJose E. Roman 367d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 368d71ae5a4SJacob Faibussowitsch { 369d24d4204SJose E. Roman Mat_Product *product = C->product; 370d24d4204SJose E. Roman 371d24d4204SJose E. Roman PetscFunctionBegin; 372d24d4204SJose E. Roman switch (product->type) { 373d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 374d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 375d71ae5a4SJacob Faibussowitsch break; 376d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 377d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 378d71ae5a4SJacob Faibussowitsch break; 379d71ae5a4SJacob Faibussowitsch default: 380d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 381d24d4204SJose E. Roman } 3823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 383d24d4204SJose E. Roman } 384d24d4204SJose E. Roman 385d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 386d71ae5a4SJacob Faibussowitsch { 387d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 388d24d4204SJose E. Roman PetscScalar *darray, *d2d, v; 389d24d4204SJose E. Roman const PetscInt *ranges; 390d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 391d24d4204SJose E. Roman 392d24d4204SJose E. Roman PetscFunctionBegin; 3939566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray)); 394d24d4204SJose E. Roman 395d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */ 396d24d4204SJose E. Roman 397d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 3989566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 3999566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 400d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 401792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 402d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 403d24d4204SJose E. Roman 404d24d4204SJose E. Roman /* allocate 2d vector */ 405d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 4069566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 407d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 408d24d4204SJose E. Roman 409d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 410792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 411d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 412d24d4204SJose E. Roman 413d24d4204SJose E. Roman /* collect diagonal */ 414d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) { 415792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 416792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 417d24d4204SJose E. Roman } 418d24d4204SJose E. Roman 419d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 420792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 4219566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 422d24d4204SJose E. Roman 423d24d4204SJose E. Roman } else { /* column version */ 424d24d4204SJose E. Roman 425d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4269566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 428d24d4204SJose E. Roman dlld = 1; 429792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 430d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 431d24d4204SJose E. Roman 432d24d4204SJose E. Roman /* allocate 2d vector */ 433d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4349566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 435d24d4204SJose E. Roman 436d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 437792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 438d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 439d24d4204SJose E. Roman 440d24d4204SJose E. Roman /* collect diagonal */ 441d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) { 442792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 443792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 444d24d4204SJose E. Roman } 445d24d4204SJose E. Roman 446d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 447792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 4489566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 449d24d4204SJose E. Roman } 450d24d4204SJose E. Roman 4519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray)); 4529566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4539566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 4543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 455d24d4204SJose E. Roman } 456d24d4204SJose E. Roman 457d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 458d71ae5a4SJacob Faibussowitsch { 459d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 460d24d4204SJose E. Roman const PetscScalar *d; 461d24d4204SJose E. Roman const PetscInt *ranges; 462d24d4204SJose E. Roman PetscScalar *d2d; 463d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 464d24d4204SJose E. Roman 465d24d4204SJose E. Roman PetscFunctionBegin; 466d24d4204SJose E. Roman if (R) { 4679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 468d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 4709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 471d24d4204SJose E. Roman dlld = 1; 472792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 473d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 474d24d4204SJose E. Roman 475d24d4204SJose E. Roman /* allocate 2d vector */ 476d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 4779566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 478d24d4204SJose E. Roman 479d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 480792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 481d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 482d24d4204SJose E. Roman 483d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 484*c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 485d24d4204SJose E. Roman 486d24d4204SJose E. Roman /* broadcast along process columns */ 487d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 488d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 489d24d4204SJose E. Roman 490d24d4204SJose E. Roman /* local scaling */ 4919371c9d4SSatish Balay for (j = 0; j < a->locc; j++) 4929371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 493d24d4204SJose E. Roman 4949566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 4959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 496d24d4204SJose E. Roman } 497d24d4204SJose E. Roman if (L) { 4989566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 499d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 5009566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 5019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 502d24d4204SJose E. Roman dlld = PetscMax(1, A->rmap->n); 503792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 504d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 505d24d4204SJose E. Roman 506d24d4204SJose E. Roman /* allocate 2d vector */ 507d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 5089566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d)); 509d24d4204SJose E. Roman dlld = PetscMax(1, lszd); 510d24d4204SJose E. Roman 511d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 512792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 513d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 514d24d4204SJose E. Roman 515d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 516*c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 517d24d4204SJose E. Roman 518d24d4204SJose E. Roman /* broadcast along process rows */ 519d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 520d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 521d24d4204SJose E. Roman 522d24d4204SJose E. Roman /* local scaling */ 5239371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 5249371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 525d24d4204SJose E. Roman 5269566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 5279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 528d24d4204SJose E. Roman } 5293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 530d24d4204SJose E. Roman } 531d24d4204SJose E. Roman 532d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 533d71ae5a4SJacob Faibussowitsch { 534d24d4204SJose E. Roman PetscFunctionBegin; 535d24d4204SJose E. Roman *missing = PETSC_FALSE; 5363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 537d24d4204SJose E. Roman } 538d24d4204SJose E. Roman 539d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 540d71ae5a4SJacob Faibussowitsch { 541d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 542d24d4204SJose E. Roman PetscBLASInt n, one = 1; 543d24d4204SJose E. Roman 544d24d4204SJose E. Roman PetscFunctionBegin; 545d24d4204SJose E. Roman n = x->lld * x->locc; 546792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 5473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 548d24d4204SJose E. Roman } 549d24d4204SJose E. Roman 550d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 551d71ae5a4SJacob Faibussowitsch { 552d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 553d24d4204SJose E. Roman PetscBLASInt i, n; 554d24d4204SJose E. Roman PetscScalar v; 555d24d4204SJose E. Roman 556d24d4204SJose E. Roman PetscFunctionBegin; 557d24d4204SJose E. Roman n = PetscMin(x->M, x->N); 558d24d4204SJose E. Roman for (i = 1; i <= n; i++) { 559792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 560d24d4204SJose E. Roman v += alpha; 561792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 562d24d4204SJose E. Roman } 5633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 564d24d4204SJose E. Roman } 565d24d4204SJose E. Roman 566d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 567d71ae5a4SJacob Faibussowitsch { 568d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 569d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 570d24d4204SJose E. Roman PetscBLASInt one = 1; 571d24d4204SJose E. Roman PetscScalar beta = 1.0; 572d24d4204SJose E. Roman 573d24d4204SJose E. Roman PetscFunctionBegin; 574d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3); 575792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 5769566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 5773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 578d24d4204SJose E. Roman } 579d24d4204SJose E. Roman 580d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 581d71ae5a4SJacob Faibussowitsch { 582d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 583d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 584d24d4204SJose E. Roman 585d24d4204SJose E. Roman PetscFunctionBegin; 5869566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 5879566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 5883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 589d24d4204SJose E. Roman } 590d24d4204SJose E. Roman 591d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 592d71ae5a4SJacob Faibussowitsch { 593d24d4204SJose E. Roman Mat Bs; 594d24d4204SJose E. Roman MPI_Comm comm; 595d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 596d24d4204SJose E. Roman 597d24d4204SJose E. Roman PetscFunctionBegin; 5989566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5999566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs)); 6009566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 6019566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK)); 602d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 603d24d4204SJose E. Roman b->M = a->M; 604d24d4204SJose E. Roman b->N = a->N; 605d24d4204SJose E. Roman b->mb = a->mb; 606d24d4204SJose E. Roman b->nb = a->nb; 607d24d4204SJose E. Roman b->rsrc = a->rsrc; 608d24d4204SJose E. Roman b->csrc = a->csrc; 6099566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 610d24d4204SJose E. Roman *B = Bs; 61148a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 612d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 614d24d4204SJose E. Roman } 615d24d4204SJose E. Roman 616d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 617d71ae5a4SJacob Faibussowitsch { 618d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 619d24d4204SJose E. Roman Mat Bs = *B; 620d24d4204SJose E. Roman PetscBLASInt one = 1; 621d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 622d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 623d24d4204SJose E. Roman PetscInt i, j; 624d24d4204SJose E. Roman #endif 625d24d4204SJose E. Roman 626d24d4204SJose E. Roman PetscFunctionBegin; 6277fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 6280fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 6299566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 630d24d4204SJose E. Roman *B = Bs; 631d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 632792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 633d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 634d24d4204SJose E. Roman /* undo conjugation */ 6359371c9d4SSatish Balay for (i = 0; i < b->locr; i++) 6369371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 637d24d4204SJose E. Roman #endif 638d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 640d24d4204SJose E. Roman } 641d24d4204SJose E. Roman 642d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 643d71ae5a4SJacob Faibussowitsch { 644d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 645d24d4204SJose E. Roman PetscInt i, j; 646d24d4204SJose E. Roman 647d24d4204SJose E. Roman PetscFunctionBegin; 6489371c9d4SSatish Balay for (i = 0; i < a->locr; i++) 6499371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 6503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 651d24d4204SJose E. Roman } 652d24d4204SJose E. Roman 653d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 654d71ae5a4SJacob Faibussowitsch { 655d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 656d24d4204SJose E. Roman Mat Bs = *B; 657d24d4204SJose E. Roman PetscBLASInt one = 1; 658d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0; 659d24d4204SJose E. Roman 660d24d4204SJose E. Roman PetscFunctionBegin; 6610fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 6629566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 663d24d4204SJose E. Roman *B = Bs; 664d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data; 665792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 666d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 6673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 668d24d4204SJose E. Roman } 669d24d4204SJose E. Roman 670d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 671d71ae5a4SJacob Faibussowitsch { 672d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 673d24d4204SJose E. Roman PetscScalar *x, *x2d; 674d24d4204SJose E. Roman const PetscInt *ranges; 675d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 676d24d4204SJose E. Roman 677d24d4204SJose E. Roman PetscFunctionBegin; 6789566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X)); 6799566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 680d24d4204SJose E. Roman 681d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 6829566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 6839566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 684d24d4204SJose E. Roman xlld = PetscMax(1, A->rmap->n); 685792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 686d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 687d24d4204SJose E. Roman 688d24d4204SJose E. Roman /* allocate 2d vector */ 689d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 6909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d)); 691d24d4204SJose E. Roman xlld = PetscMax(1, lszx); 692d24d4204SJose E. Roman 693d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 694792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 695d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 696d24d4204SJose E. Roman 697d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 698792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 699d24d4204SJose E. Roman 700d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 701d24d4204SJose E. Roman switch (A->factortype) { 702d24d4204SJose E. Roman case MAT_FACTOR_LU: 703792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 704d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 705d24d4204SJose E. Roman break; 706d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 707792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 708d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 709d24d4204SJose E. Roman break; 710d71ae5a4SJacob Faibussowitsch default: 711d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 712d24d4204SJose E. Roman } 713d24d4204SJose E. Roman 714d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 715792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 716d24d4204SJose E. Roman 7179566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 7189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 7193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 720d24d4204SJose E. Roman } 721d24d4204SJose E. Roman 722d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 723d71ae5a4SJacob Faibussowitsch { 724d24d4204SJose E. Roman PetscFunctionBegin; 7259566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X)); 7269566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y)); 7273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 728d24d4204SJose E. Roman } 729d24d4204SJose E. Roman 730d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 731d71ae5a4SJacob Faibussowitsch { 732d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 733d24d4204SJose E. Roman PetscBool flg1, flg2; 734d24d4204SJose E. Roman PetscBLASInt one = 1, info; 735d24d4204SJose E. Roman 736d24d4204SJose E. Roman PetscFunctionBegin; 7379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 7389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 73908401ef6SPierre Jolivet PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 740d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B, 1, X, 2); 741d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)B->data; 742d24d4204SJose E. Roman x = (Mat_ScaLAPACK *)X->data; 7439566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 744d24d4204SJose E. Roman 745d24d4204SJose E. Roman switch (A->factortype) { 746d24d4204SJose E. Roman case MAT_FACTOR_LU: 747792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 748d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info); 749d24d4204SJose E. Roman break; 750d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 751792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 752d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info); 753d24d4204SJose E. Roman break; 754d71ae5a4SJacob Faibussowitsch default: 755d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 756d24d4204SJose E. Roman } 7573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 758d24d4204SJose E. Roman } 759d24d4204SJose E. Roman 760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 761d71ae5a4SJacob Faibussowitsch { 762d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 763d24d4204SJose E. Roman PetscBLASInt one = 1, info; 764d24d4204SJose E. Roman 765d24d4204SJose E. Roman PetscFunctionBegin; 766aa624791SPierre Jolivet if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 767792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 768d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info); 769d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 770d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 771d24d4204SJose E. Roman 7729566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7739566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 7743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 775d24d4204SJose E. Roman } 776d24d4204SJose E. Roman 777d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 778d71ae5a4SJacob Faibussowitsch { 779d24d4204SJose E. Roman PetscFunctionBegin; 7809566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 7819566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 7823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 783d24d4204SJose E. Roman } 784d24d4204SJose E. Roman 785d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 786d71ae5a4SJacob Faibussowitsch { 787d24d4204SJose E. Roman PetscFunctionBegin; 788d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 7893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 790d24d4204SJose E. Roman } 791d24d4204SJose E. Roman 792d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 793d71ae5a4SJacob Faibussowitsch { 794d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 795d24d4204SJose E. Roman PetscBLASInt one = 1, info; 796d24d4204SJose E. Roman 797d24d4204SJose E. Roman PetscFunctionBegin; 798792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 799d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info); 800d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 801d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 802d24d4204SJose E. Roman 8039566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8049566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 8053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 806d24d4204SJose E. Roman } 807d24d4204SJose E. Roman 808d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 809d71ae5a4SJacob Faibussowitsch { 810d24d4204SJose E. Roman PetscFunctionBegin; 8119566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 8129566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 8133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 814d24d4204SJose E. Roman } 815d24d4204SJose E. Roman 816d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 817d71ae5a4SJacob Faibussowitsch { 818d24d4204SJose E. Roman PetscFunctionBegin; 819d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 8203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 821d24d4204SJose E. Roman } 822d24d4204SJose E. Roman 82366976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 824d71ae5a4SJacob Faibussowitsch { 825d24d4204SJose E. Roman PetscFunctionBegin; 826d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 8273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 828d24d4204SJose E. Roman } 829d24d4204SJose E. Roman 830d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 831d71ae5a4SJacob Faibussowitsch { 832d24d4204SJose E. Roman Mat B; 83359172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 834d24d4204SJose E. Roman 835d24d4204SJose E. Roman PetscFunctionBegin; 836d24d4204SJose E. Roman /* Create the factorization matrix */ 8379566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 83866e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 839d24d4204SJose E. Roman B->factortype = ftype; 8409566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8419566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 842d24d4204SJose E. Roman 8439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 844d24d4204SJose E. Roman *F = B; 8453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 846d24d4204SJose E. Roman } 847d24d4204SJose E. Roman 848d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 849d71ae5a4SJacob Faibussowitsch { 850d24d4204SJose E. Roman PetscFunctionBegin; 8519566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 8529566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 8533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 854d24d4204SJose E. Roman } 855d24d4204SJose E. Roman 856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 857d71ae5a4SJacob Faibussowitsch { 858d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 859d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0; 860d24d4204SJose E. Roman const char *ntype; 861d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy; 862d24d4204SJose E. Roman 863d24d4204SJose E. Roman PetscFunctionBegin; 864d24d4204SJose E. Roman switch (type) { 865d24d4204SJose E. Roman case NORM_1: 866d24d4204SJose E. Roman ntype = "1"; 867d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 868d24d4204SJose E. Roman break; 869d24d4204SJose E. Roman case NORM_FROBENIUS: 870d24d4204SJose E. Roman ntype = "F"; 871d24d4204SJose E. Roman work = &dummy; 872d24d4204SJose E. Roman break; 873d24d4204SJose E. Roman case NORM_INFINITY: 874d24d4204SJose E. Roman ntype = "I"; 875d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc); 876d24d4204SJose E. Roman break; 877d71ae5a4SJacob Faibussowitsch default: 878d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 879d24d4204SJose E. Roman } 8809566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 881d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 8829566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 884d24d4204SJose E. Roman } 885d24d4204SJose E. Roman 886d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 887d71ae5a4SJacob Faibussowitsch { 888d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 889d24d4204SJose E. Roman 890d24d4204SJose E. Roman PetscFunctionBegin; 8919566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 8923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 893d24d4204SJose E. Roman } 894d24d4204SJose E. Roman 895d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 896d71ae5a4SJacob Faibussowitsch { 897d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 898d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx; 899d24d4204SJose E. Roman 900d24d4204SJose E. Roman PetscFunctionBegin; 901d24d4204SJose E. Roman if (rows) { 902d24d4204SJose E. Roman n = a->locr; 903d24d4204SJose E. Roman nb = a->mb; 904d24d4204SJose E. Roman isrc = a->rsrc; 905d24d4204SJose E. Roman nproc = a->grid->nprow; 906d24d4204SJose E. Roman iproc = a->grid->myrow; 9079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 908d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9099566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 910d24d4204SJose E. Roman } 911d24d4204SJose E. Roman if (cols) { 912d24d4204SJose E. Roman n = a->locc; 913d24d4204SJose E. Roman nb = a->nb; 914d24d4204SJose E. Roman isrc = a->csrc; 915d24d4204SJose E. Roman nproc = a->grid->npcol; 916d24d4204SJose E. Roman iproc = a->grid->mycol; 9179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx)); 918d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 9199566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 920d24d4204SJose E. Roman } 9213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 922d24d4204SJose E. Roman } 923d24d4204SJose E. Roman 924d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 925d71ae5a4SJacob Faibussowitsch { 926d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 927d24d4204SJose E. Roman Mat Bmpi; 928d24d4204SJose E. Roman MPI_Comm comm; 929a00b6df3SJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 9304b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork; 9314b1a79daSJose E. Roman const PetscScalar *vwork; 932d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 933d24d4204SJose E. Roman PetscScalar *barray; 9344b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE; 9354b1a79daSJose E. Roman PetscMPIInt size; 936d24d4204SJose E. Roman 937d24d4204SJose E. Roman PetscFunctionBegin; 9389566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9399566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 9404b1a79daSJose E. Roman 9414b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9439566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 9449371c9d4SSatish Balay for (i = 0; i < size; i++) 9459371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) { 9469371c9d4SSatish Balay differ = PETSC_TRUE; 9479371c9d4SSatish Balay break; 9489371c9d4SSatish Balay } 9494b1a79daSJose E. Roman } 9504b1a79daSJose E. Roman 9514b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9529566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 9534b1a79daSJose E. Roman m = PETSC_DECIDE; 9549566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 9554b1a79daSJose E. Roman n = PETSC_DECIDE; 9569566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9579566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9589566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 9599566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 9604b1a79daSJose E. Roman 9614b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9629566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 963a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 964a00b6df3SJose E. Roman lld = PetscMax(ldb, 1); /* local leading dimension */ 965792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 9664b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info); 9674b1a79daSJose E. Roman 9684b1a79daSJose E. Roman /* redistribute matrix */ 9699566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 970792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 9719566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 9729566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 9739566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 9744b1a79daSJose E. Roman 9754b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9769566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 9774b1a79daSJose E. Roman for (i = rstart; i < rend; i++) { 9789566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 9799566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 9809566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 9814b1a79daSJose E. Roman } 9829566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 9839566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 9849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 9854b1a79daSJose E. Roman 9864b1a79daSJose E. Roman } else { /* normal cases */ 987d24d4204SJose E. Roman 988d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 989d24d4204SJose E. Roman else { 9909566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 99192c846b4SJose E. Roman m = PETSC_DECIDE; 9929566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 99392c846b4SJose E. Roman n = PETSC_DECIDE; 9949566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 9959566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 9969566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE)); 9979566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 998d24d4204SJose E. Roman } 999d24d4204SJose E. Roman 1000d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 10019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1002a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1003a00b6df3SJose E. Roman lld = PetscMax(ldb, 1); /* local leading dimension */ 1004792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1005d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1006d24d4204SJose E. Roman 1007d24d4204SJose E. Roman /* redistribute matrix */ 10089566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray)); 1009792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 10109566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1011d24d4204SJose E. Roman 10129566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10139566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1014d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10159566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1016d24d4204SJose E. Roman } else *B = Bmpi; 10174b1a79daSJose E. Roman } 10183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1019d24d4204SJose E. Roman } 1020d24d4204SJose E. Roman 1021d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1022d71ae5a4SJacob Faibussowitsch { 1023b12397e7SPierre Jolivet const PetscInt *ranges; 1024b12397e7SPierre Jolivet PetscMPIInt size; 1025b12397e7SPierre Jolivet PetscInt i, n; 1026b12397e7SPierre Jolivet 1027b12397e7SPierre Jolivet PetscFunctionBegin; 1028b12397e7SPierre Jolivet *correct = PETSC_TRUE; 1029b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1030b12397e7SPierre Jolivet if (size > 1) { 1031b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges)); 1032b12397e7SPierre Jolivet n = ranges[1] - ranges[0]; 10339371c9d4SSatish Balay for (i = 1; i < size; i++) 10349371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break; 1035b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1036b12397e7SPierre Jolivet } 10373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1038b12397e7SPierre Jolivet } 1039b12397e7SPierre Jolivet 1040d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1041d71ae5a4SJacob Faibussowitsch { 1042d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1043d24d4204SJose E. Roman Mat Bmpi; 1044d24d4204SJose E. Roman MPI_Comm comm; 104592c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1046b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols; 1047d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1048*c87776d3SJose E. Roman const PetscScalar *aarray; 1049b12397e7SPierre Jolivet IS ir, ic; 10504e8b52a1SJose E. Roman PetscInt lda; 1051b12397e7SPierre Jolivet PetscBool flg; 1052d24d4204SJose E. Roman 1053d24d4204SJose E. Roman PetscFunctionBegin; 10549566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1055d24d4204SJose E. Roman 1056d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1057d24d4204SJose E. Roman else { 10589566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi)); 105992c846b4SJose E. Roman m = PETSC_DECIDE; 10609566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 106192c846b4SJose E. Roman n = PETSC_DECIDE; 10629566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 10639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 10649566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 10659566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1066d24d4204SJose E. Roman } 1067d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data; 1068d24d4204SJose E. Roman 1069b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda)); 1070*c87776d3SJose E. Roman PetscCall(MatDenseGetArrayRead(A, &aarray)); 1071b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1072b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1073b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1074d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 10759566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 10769566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 10774e8b52a1SJose E. Roman lld = PetscMax(lda, 1); /* local leading dimension */ 1078792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1079d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 1080d24d4204SJose E. Roman 1081d24d4204SJose E. Roman /* redistribute matrix */ 1082792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1083b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1084b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1085b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1086b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1087b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1088b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows)); 1089b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols)); 1090b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1091b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows)); 1092b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols)); 1093b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1094b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1095b12397e7SPierre Jolivet } 1096*c87776d3SJose E. Roman PetscCall(MatDenseRestoreArrayRead(A, &aarray)); 10979566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 10989566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1099d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 11009566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &Bmpi)); 1101d24d4204SJose E. Roman } else *B = Bmpi; 11023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1103d24d4204SJose E. Roman } 1104d24d4204SJose E. Roman 1105d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1106d71ae5a4SJacob Faibussowitsch { 1107d24d4204SJose E. Roman Mat mat_scal; 1108d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1109d24d4204SJose E. Roman const PetscInt *cols; 1110d24d4204SJose E. Roman const PetscScalar *vals; 1111d24d4204SJose E. Roman 1112d24d4204SJose E. Roman PetscFunctionBegin; 1113d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1114d24d4204SJose E. Roman mat_scal = *newmat; 11159566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1116d24d4204SJose E. Roman } else { 11179566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1118d24d4204SJose E. Roman m = PETSC_DECIDE; 11199566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1120d24d4204SJose E. Roman n = PETSC_DECIDE; 11219566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11229566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11239566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11249566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1125d24d4204SJose E. Roman } 1126d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11279566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11289566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 11299566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1130d24d4204SJose E. Roman } 11319566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11329566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1133d24d4204SJose E. Roman 11349566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1135d24d4204SJose E. Roman else *newmat = mat_scal; 11363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1137d24d4204SJose E. Roman } 1138d24d4204SJose E. Roman 1139d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1140d71ae5a4SJacob Faibussowitsch { 1141d24d4204SJose E. Roman Mat mat_scal; 1142d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1143d24d4204SJose E. Roman const PetscInt *cols; 1144d24d4204SJose E. Roman const PetscScalar *vals; 1145d24d4204SJose E. Roman PetscScalar v; 1146d24d4204SJose E. Roman 1147d24d4204SJose E. Roman PetscFunctionBegin; 1148d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1149d24d4204SJose E. Roman mat_scal = *newmat; 11509566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1151d24d4204SJose E. Roman } else { 11529566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1153d24d4204SJose E. Roman m = PETSC_DECIDE; 11549566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1155d24d4204SJose E. Roman n = PETSC_DECIDE; 11569566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 11579566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 11589566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 11599566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1160d24d4204SJose E. Roman } 11619566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1162d24d4204SJose E. Roman for (row = rstart; row < rend; row++) { 11639566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 11649566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1165d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */ 1166d24d4204SJose E. Roman if (cols[j] == row) continue; 1167b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 11689566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1169d24d4204SJose E. Roman } 11709566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1171d24d4204SJose E. Roman } 11729566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 11739566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 11749566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1175d24d4204SJose E. Roman 11769566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1177d24d4204SJose E. Roman else *newmat = mat_scal; 11783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1179d24d4204SJose E. Roman } 1180d24d4204SJose E. Roman 1181d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1182d71ae5a4SJacob Faibussowitsch { 1183d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1184d24d4204SJose E. Roman PetscInt sz = 0; 1185d24d4204SJose E. Roman 1186d24d4204SJose E. Roman PetscFunctionBegin; 11879566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11889566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1189d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1190d24d4204SJose E. Roman 11919566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11929566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 11939566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc)); 1194d24d4204SJose E. Roman 1195d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 11963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1197d24d4204SJose E. Roman } 1198d24d4204SJose E. Roman 1199d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1200d71ae5a4SJacob Faibussowitsch { 1201d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1202d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1203d24d4204SJose E. Roman PetscBool flg; 1204d24d4204SJose E. Roman MPI_Comm icomm; 1205d24d4204SJose E. Roman 1206d24d4204SJose E. Roman PetscFunctionBegin; 12079566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 12089566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12099566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 12109566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 12119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1212d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1213d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1214d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1215d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 12169566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 12179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1218d24d4204SJose E. Roman } 12199566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 12209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 12219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 12229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 12239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 12249566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 12253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1226d24d4204SJose E. Roman } 1227d24d4204SJose E. Roman 122866976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1229d71ae5a4SJacob Faibussowitsch { 1230d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1231d24d4204SJose E. Roman PetscBLASInt info = 0; 1232b12397e7SPierre Jolivet PetscBool flg; 1233d24d4204SJose E. Roman 1234d24d4204SJose E. Roman PetscFunctionBegin; 12359566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12369566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1237d24d4204SJose E. Roman 1238b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1239b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1240b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1241b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1242b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1243d24d4204SJose E. Roman 1244d24d4204SJose E. Roman /* compute local sizes */ 12459566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 12469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1247d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1248d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1249d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr); 1250d24d4204SJose E. Roman 1251d24d4204SJose E. Roman /* allocate local array */ 12529566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1253d24d4204SJose E. Roman 1254d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1255792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1256d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info); 12573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1258d24d4204SJose E. Roman } 1259d24d4204SJose E. Roman 126066976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1261d71ae5a4SJacob Faibussowitsch { 1262d24d4204SJose E. Roman PetscInt nstash, reallocs; 1263d24d4204SJose E. Roman 1264d24d4204SJose E. Roman PetscFunctionBegin; 12653ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 12669566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 12679566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 12689566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 12693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1270d24d4204SJose E. Roman } 1271d24d4204SJose E. Roman 127266976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1273d71ae5a4SJacob Faibussowitsch { 1274d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1275d24d4204SJose E. Roman PetscMPIInt n; 1276d24d4204SJose E. Roman PetscInt i, flg, *row, *col; 1277d24d4204SJose E. Roman PetscScalar *val; 1278d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1279d24d4204SJose E. Roman 1280d24d4204SJose E. Roman PetscFunctionBegin; 12813ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1282d24d4204SJose E. Roman while (1) { 12839566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1284d24d4204SJose E. Roman if (!flg) break; 1285d24d4204SJose E. Roman for (i = 0; i < n; i++) { 12869566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 12879566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1288792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1289aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1290d24d4204SJose E. Roman switch (A->insertmode) { 1291d71ae5a4SJacob Faibussowitsch case INSERT_VALUES: 1292d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1293d71ae5a4SJacob Faibussowitsch break; 1294d71ae5a4SJacob Faibussowitsch case ADD_VALUES: 1295d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1296d71ae5a4SJacob Faibussowitsch break; 1297d71ae5a4SJacob Faibussowitsch default: 1298d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1299d24d4204SJose E. Roman } 1300d24d4204SJose E. Roman } 1301d24d4204SJose E. Roman } 13029566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 13033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1304d24d4204SJose E. Roman } 1305d24d4204SJose E. Roman 130666976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1307d71ae5a4SJacob Faibussowitsch { 1308d24d4204SJose E. Roman Mat Adense, As; 1309d24d4204SJose E. Roman MPI_Comm comm; 1310d24d4204SJose E. Roman 1311d24d4204SJose E. Roman PetscFunctionBegin; 13129566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 13139566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense)); 13149566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE)); 13159566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer)); 13169566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 13179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 13189566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As)); 13193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1320d24d4204SJose E. Roman } 1321d24d4204SJose E. Roman 13229371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1323d24d4204SJose E. Roman 0, 1324d24d4204SJose E. Roman 0, 1325d24d4204SJose E. Roman MatMult_ScaLAPACK, 1326d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1327d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1328d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1329d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1330d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1331d24d4204SJose E. Roman 0, 1332d24d4204SJose E. Roman /*10*/ 0, 1333d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1334d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1335d24d4204SJose E. Roman 0, 1336d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1337d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1340d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1341d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1342d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1343d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1344d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1345d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1346d24d4204SJose E. Roman /*24*/ 0, 1347d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1348d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1349d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1350d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1351d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1352d24d4204SJose E. Roman 0, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman 0, 1355d24d4204SJose E. Roman 0, 1356d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman 0, 1360d24d4204SJose E. Roman 0, 1361d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman 0, 1365d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1366d24d4204SJose E. Roman /*44*/ 0, 1367d24d4204SJose E. Roman MatScale_ScaLAPACK, 1368d24d4204SJose E. Roman MatShift_ScaLAPACK, 1369d24d4204SJose E. Roman 0, 1370d24d4204SJose E. Roman 0, 1371d24d4204SJose E. Roman /*49*/ 0, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman 0, 1374d24d4204SJose E. Roman 0, 1375d24d4204SJose E. Roman 0, 1376d24d4204SJose E. Roman /*54*/ 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman 0, 1379d24d4204SJose E. Roman 0, 1380d24d4204SJose E. Roman 0, 1381d24d4204SJose E. Roman /*59*/ 0, 1382d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1383d24d4204SJose E. Roman MatView_ScaLAPACK, 1384d24d4204SJose E. Roman 0, 1385d24d4204SJose E. Roman 0, 1386d24d4204SJose E. Roman /*64*/ 0, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman 0, 1390d24d4204SJose E. Roman 0, 1391d24d4204SJose E. Roman /*69*/ 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1394d24d4204SJose E. Roman 0, 1395d24d4204SJose E. Roman 0, 1396d24d4204SJose E. Roman /*74*/ 0, 1397d24d4204SJose E. Roman 0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman 0, 1400d24d4204SJose E. Roman 0, 1401d24d4204SJose E. Roman /*79*/ 0, 1402d24d4204SJose E. Roman 0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman 0, 1405d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1406d24d4204SJose E. Roman /*84*/ 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman 0, 1409d24d4204SJose E. Roman 0, 1410d24d4204SJose E. Roman 0, 1411d24d4204SJose E. Roman /*89*/ 0, 1412d24d4204SJose E. Roman 0, 1413d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1414d24d4204SJose E. Roman 0, 1415d24d4204SJose E. Roman 0, 1416d24d4204SJose E. Roman /*94*/ 0, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1420d24d4204SJose E. Roman 0, 1421d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1422d24d4204SJose E. Roman 0, 1423d24d4204SJose E. Roman 0, 1424d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1425d24d4204SJose E. Roman 0, 1426d24d4204SJose E. Roman /*104*/ 0, 1427d24d4204SJose E. Roman 0, 1428d24d4204SJose E. Roman 0, 1429d24d4204SJose E. Roman 0, 1430d24d4204SJose E. Roman 0, 1431d24d4204SJose E. Roman /*109*/ MatMatSolve_ScaLAPACK, 1432d24d4204SJose E. Roman 0, 1433d24d4204SJose E. Roman 0, 1434d24d4204SJose E. Roman 0, 1435d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1436d24d4204SJose E. Roman /*114*/ 0, 1437d24d4204SJose E. Roman 0, 1438d24d4204SJose E. Roman 0, 1439d24d4204SJose E. Roman 0, 1440d24d4204SJose E. Roman 0, 1441d24d4204SJose E. Roman /*119*/ 0, 1442d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1443d24d4204SJose E. Roman 0, 1444d24d4204SJose E. Roman 0, 1445d24d4204SJose E. Roman 0, 1446d24d4204SJose E. Roman /*124*/ 0, 1447d24d4204SJose E. Roman 0, 1448d24d4204SJose E. Roman 0, 1449d24d4204SJose E. Roman 0, 1450d24d4204SJose E. Roman 0, 1451d24d4204SJose E. Roman /*129*/ 0, 1452d24d4204SJose E. Roman 0, 1453d24d4204SJose E. Roman 0, 1454d24d4204SJose E. Roman 0, 1455d24d4204SJose E. Roman 0, 1456d24d4204SJose E. Roman /*134*/ 0, 1457d24d4204SJose E. Roman 0, 1458d24d4204SJose E. Roman 0, 1459d24d4204SJose E. Roman 0, 1460d24d4204SJose E. Roman 0, 1461d24d4204SJose E. Roman 0, 1462d24d4204SJose E. Roman /*140*/ 0, 1463d24d4204SJose E. Roman 0, 1464d24d4204SJose E. Roman 0, 1465d24d4204SJose E. Roman 0, 1466d24d4204SJose E. Roman 0, 1467d24d4204SJose E. Roman /*145*/ 0, 1468d24d4204SJose E. Roman 0, 146999a7f59eSMark Adams 0, 147099a7f59eSMark Adams 0, 14717fb60732SBarry Smith 0, 1472ed6168d8SPierre Jolivet /*150*/ 0, 1473ed6168d8SPierre Jolivet 0}; 1474d24d4204SJose E. Roman 1475d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1476d71ae5a4SJacob Faibussowitsch { 1477d24d4204SJose E. Roman PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1478d24d4204SJose E. Roman PetscInt size = stash->size, nsends; 1479d24d4204SJose E. Roman PetscInt count, *sindices, **rindices, i, j, l; 1480d24d4204SJose E. Roman PetscScalar **rvalues, *svalues; 1481d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1482d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1483d24d4204SJose E. Roman PetscMPIInt *sizes, *nlengths, nreceives; 1484d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy; 1485d24d4204SJose E. Roman PetscScalar *sp_val; 1486d24d4204SJose E. Roman PetscMatStashSpace space, space_next; 1487d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1488d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1489d24d4204SJose E. Roman 1490d24d4204SJose E. Roman PetscFunctionBegin; 1491d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1492d24d4204SJose E. Roman InsertMode addv; 14931c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 149408401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1495d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1496d24d4204SJose E. Roman } 1497d24d4204SJose E. Roman 1498d24d4204SJose E. Roman bs2 = stash->bs * stash->bs; 1499d24d4204SJose E. Roman 1500d24d4204SJose E. Roman /* first count number of contributors to each processor */ 15019566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 15029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1503d24d4204SJose E. Roman 1504d24d4204SJose E. Roman i = j = 0; 1505d24d4204SJose E. Roman space = stash->space_head; 1506d24d4204SJose E. Roman while (space) { 1507d24d4204SJose E. Roman space_next = space->next; 1508d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 15099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 15109566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1511792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1512d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 15139371c9d4SSatish Balay nlengths[j]++; 15149371c9d4SSatish Balay owner[i] = j; 1515d24d4204SJose E. Roman i++; 1516d24d4204SJose E. Roman } 1517d24d4204SJose E. Roman space = space_next; 1518d24d4204SJose E. Roman } 1519d24d4204SJose E. Roman 1520d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 15219566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 1522d24d4204SJose E. Roman for (i = 0, nsends = 0; i < size; i++) { 1523d24d4204SJose E. Roman if (nlengths[i]) { 15249371c9d4SSatish Balay sizes[i] = 1; 15259371c9d4SSatish Balay nsends++; 1526d24d4204SJose E. Roman } 1527d24d4204SJose E. Roman } 1528d24d4204SJose E. Roman 15299371c9d4SSatish Balay { 15309371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 1531d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 15329566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 15339566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1534d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1535d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] *= 2; 15369566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1537d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1538d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 15399566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 15409566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 15419371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 15429371c9d4SSatish Balay } 1543d24d4204SJose E. Roman 1544d24d4204SJose E. Roman /* do sends: 1545d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1546d24d4204SJose E. Roman the ith processor 1547d24d4204SJose E. Roman */ 15489566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 15499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 15509566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1551d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 15529371c9d4SSatish Balay startv[0] = 0; 15539371c9d4SSatish Balay starti[0] = 0; 1554d24d4204SJose E. Roman for (i = 1; i < size; i++) { 1555d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1]; 1556d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1557d24d4204SJose E. Roman } 1558d24d4204SJose E. Roman 1559d24d4204SJose E. Roman i = 0; 1560d24d4204SJose E. Roman space = stash->space_head; 1561d24d4204SJose E. Roman while (space) { 1562d24d4204SJose E. Roman space_next = space->next; 1563d24d4204SJose E. Roman sp_idx = space->idx; 1564d24d4204SJose E. Roman sp_idy = space->idy; 1565d24d4204SJose E. Roman sp_val = space->val; 1566d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) { 1567d24d4204SJose E. Roman j = owner[i]; 1568d24d4204SJose E. Roman if (bs2 == 1) { 1569d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1570d24d4204SJose E. Roman } else { 1571d24d4204SJose E. Roman PetscInt k; 1572d24d4204SJose E. Roman PetscScalar *buf1, *buf2; 1573d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j]; 1574d24d4204SJose E. Roman buf2 = space->val + bs2 * l; 1575d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1576d24d4204SJose E. Roman } 1577d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1578d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1579d24d4204SJose E. Roman startv[j]++; 1580d24d4204SJose E. Roman starti[j]++; 1581d24d4204SJose E. Roman i++; 1582d24d4204SJose E. Roman } 1583d24d4204SJose E. Roman space = space_next; 1584d24d4204SJose E. Roman } 1585d24d4204SJose E. Roman startv[0] = 0; 1586d24d4204SJose E. Roman for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1587d24d4204SJose E. Roman 1588d24d4204SJose E. Roman for (i = 0, count = 0; i < size; i++) { 1589d24d4204SJose E. Roman if (sizes[i]) { 15909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 15919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1592d24d4204SJose E. Roman } 1593d24d4204SJose E. Roman } 1594d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 15959566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1596d24d4204SJose E. Roman for (i = 0; i < size; i++) { 159748a46eb9SPierre Jolivet if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1598d24d4204SJose E. Roman } 1599d24d4204SJose E. Roman #endif 16009566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 16019566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 16029566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 16039566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1604d24d4204SJose E. Roman 1605d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 16069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1607d24d4204SJose E. Roman 1608d24d4204SJose E. Roman for (i = 0; i < nreceives; i++) { 1609d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i]; 1610d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i]; 1611d24d4204SJose E. Roman } 1612d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1613d24d4204SJose E. Roman 16149566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 16159566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1616d24d4204SJose E. Roman 1617d24d4204SJose E. Roman stash->svalues = svalues; 1618d24d4204SJose E. Roman stash->sindices = sindices; 1619d24d4204SJose E. Roman stash->rvalues = rvalues; 1620d24d4204SJose E. Roman stash->rindices = rindices; 1621d24d4204SJose E. Roman stash->send_waits = send_waits; 1622d24d4204SJose E. Roman stash->nsends = nsends; 1623d24d4204SJose E. Roman stash->nrecvs = nreceives; 1624d24d4204SJose E. Roman stash->reproduce_count = 0; 16253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1626d24d4204SJose E. Roman } 1627d24d4204SJose E. Roman 1628d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1629d71ae5a4SJacob Faibussowitsch { 1630d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1631d24d4204SJose E. Roman 1632d24d4204SJose E. Roman PetscFunctionBegin; 163328b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1634aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1635aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 16369566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 16379566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 16383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1639d24d4204SJose E. Roman } 1640d24d4204SJose E. Roman 1641d24d4204SJose E. Roman /*@ 16426aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 164311a5261eSBarry Smith the `MATSCALAPACK` matrix 1644d24d4204SJose E. Roman 1645c3339decSBarry Smith Logically Collective 1646d24d4204SJose E. Roman 1647d8d19677SJose E. Roman Input Parameters: 164811a5261eSBarry Smith + A - a `MATSCALAPACK` matrix 1649d24d4204SJose E. Roman . mb - the row block size 1650d24d4204SJose E. Roman - nb - the column block size 1651d24d4204SJose E. Roman 1652d24d4204SJose E. Roman Level: intermediate 1653d24d4204SJose E. Roman 16542ef1f0ffSBarry Smith Note: 16552ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 16562ef1f0ffSBarry Smith 16571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1658d24d4204SJose E. Roman @*/ 1659d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1660d71ae5a4SJacob Faibussowitsch { 1661d24d4204SJose E. Roman PetscFunctionBegin; 1662d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1663d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2); 1664d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3); 1665cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 16663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1667d24d4204SJose E. Roman } 1668d24d4204SJose E. Roman 1669d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1670d71ae5a4SJacob Faibussowitsch { 1671d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1672d24d4204SJose E. Roman 1673d24d4204SJose E. Roman PetscFunctionBegin; 1674d24d4204SJose E. Roman if (mb) *mb = a->mb; 1675d24d4204SJose E. Roman if (nb) *nb = a->nb; 16763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1677d24d4204SJose E. Roman } 1678d24d4204SJose E. Roman 1679d24d4204SJose E. Roman /*@ 16806aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 168111a5261eSBarry Smith the `MATSCALAPACK` matrix 1682d24d4204SJose E. Roman 16832ef1f0ffSBarry Smith Not Collective 1684d24d4204SJose E. Roman 1685d24d4204SJose E. Roman Input Parameter: 168611a5261eSBarry Smith . A - a `MATSCALAPACK` matrix 1687d24d4204SJose E. Roman 1688d24d4204SJose E. Roman Output Parameters: 1689d24d4204SJose E. Roman + mb - the row block size 1690d24d4204SJose E. Roman - nb - the column block size 1691d24d4204SJose E. Roman 1692d24d4204SJose E. Roman Level: intermediate 1693d24d4204SJose E. Roman 16942ef1f0ffSBarry Smith Note: 16952ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 16962ef1f0ffSBarry Smith 16971cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1698d24d4204SJose E. Roman @*/ 1699d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1700d71ae5a4SJacob Faibussowitsch { 1701d24d4204SJose E. Roman PetscFunctionBegin; 1702d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1703cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 17043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1705d24d4204SJose E. Roman } 1706d24d4204SJose E. Roman 1707d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1708d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1709d24d4204SJose E. Roman 1710d24d4204SJose E. Roman /*MC 1711d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1712d24d4204SJose E. Roman 17132ef1f0ffSBarry Smith Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1714d24d4204SJose E. Roman 1715d24d4204SJose E. Roman Options Database Keys: 17162ef1f0ffSBarry Smith + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 17172ef1f0ffSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1718d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1719d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1720d24d4204SJose E. Roman 17212ef1f0ffSBarry Smith Level: intermediate 17222ef1f0ffSBarry Smith 172389bba20eSBarry Smith Note: 172489bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 172589bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 172689bba20eSBarry Smith the given rank. 172789bba20eSBarry Smith 17281cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1729d24d4204SJose E. Roman M*/ 1730d24d4204SJose E. Roman 1731d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1732d71ae5a4SJacob Faibussowitsch { 1733d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1734d24d4204SJose E. Roman PetscBool flg, flg1; 1735d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1736d24d4204SJose E. Roman MPI_Comm icomm; 1737d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol; 1738d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0}; 1739d24d4204SJose E. Roman PetscMPIInt size; 1740d24d4204SJose E. Roman 1741d24d4204SJose E. Roman PetscFunctionBegin; 1742aea10558SJacob Faibussowitsch A->ops[0] = MatOps_Values; 1743d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1744d24d4204SJose E. Roman 17459566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1746d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1747d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1748d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1749d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1750d24d4204SJose E. Roman 17514dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1752d24d4204SJose E. Roman A->data = (void *)a; 1753d24d4204SJose E. Roman 1754d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1755d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 17569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 17579566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 17589566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1759d24d4204SJose E. Roman } 17609566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 17619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1762d24d4204SJose E. Roman if (!flg) { 17634dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&grid)); 1764d24d4204SJose E. Roman 17659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size)); 1766d24d4204SJose E. Roman grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1767d24d4204SJose E. Roman 1768d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 17699566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1770d24d4204SJose E. Roman if (flg1) { 177108401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1772d24d4204SJose E. Roman grid->nprow = optv1; 1773d24d4204SJose E. Roman } 1774d0609cedSBarry Smith PetscOptionsEnd(); 1775d24d4204SJose E. Roman 1776d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1777d24d4204SJose E. Roman grid->npcol = size / grid->nprow; 17789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 17799566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1780f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1781d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1782d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1783d24d4204SJose E. Roman grid->grid_refct = 1; 1784d24d4204SJose E. Roman grid->nprow = nprow; 1785d24d4204SJose E. Roman grid->npcol = npcol; 1786d24d4204SJose E. Roman grid->myrow = myrow; 1787d24d4204SJose E. Roman grid->mycol = mycol; 1788d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1789f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1790d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1791f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1792d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 17939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1794d24d4204SJose E. Roman 1795d24d4204SJose E. Roman } else grid->grid_refct++; 17969566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1797d24d4204SJose E. Roman a->grid = grid; 1798d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1799d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1800d24d4204SJose E. Roman 1801d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 18029566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1803d24d4204SJose E. Roman if (flg) { 1804d24d4204SJose E. Roman a->mb = array[0]; 1805d24d4204SJose E. Roman a->nb = (k > 1) ? array[1] : a->mb; 1806d24d4204SJose E. Roman } 1807d0609cedSBarry Smith PetscOptionsEnd(); 1808d24d4204SJose E. Roman 1809b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 18109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 18119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 18129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 18139566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 18143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1815d24d4204SJose E. Roman } 1816d24d4204SJose E. Roman 1817d24d4204SJose E. Roman /*@C 1818d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 181911a5261eSBarry Smith (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1820d24d4204SJose E. Roman 1821d24d4204SJose E. Roman Collective 1822d24d4204SJose E. Roman 1823d24d4204SJose E. Roman Input Parameters: 1824d24d4204SJose E. Roman + comm - MPI communicator 182511a5261eSBarry Smith . mb - row block size (or `PETSC_DECIDE` to have it set) 182611a5261eSBarry Smith . nb - column block size (or `PETSC_DECIDE` to have it set) 1827d24d4204SJose E. Roman . M - number of global rows 1828d24d4204SJose E. Roman . N - number of global columns 1829d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1830d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1831d24d4204SJose E. Roman 1832d24d4204SJose E. Roman Output Parameter: 1833d24d4204SJose E. Roman . A - the matrix 1834d24d4204SJose E. Roman 183511a5261eSBarry Smith Options Database Key: 1836d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1837d24d4204SJose E. Roman 18382ef1f0ffSBarry Smith Level: intermediate 18392ef1f0ffSBarry Smith 18402ef1f0ffSBarry Smith Notes: 18412ef1f0ffSBarry Smith If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 18422ef1f0ffSBarry Smith 184311a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1844d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 184511a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1846d24d4204SJose E. Roman 1847d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1848d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1849d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 185011a5261eSBarry Smith used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1851d24d4204SJose E. Roman 18521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1853d24d4204SJose E. Roman @*/ 1854d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1855d71ae5a4SJacob Faibussowitsch { 1856d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1857d24d4204SJose E. Roman PetscInt m, n; 1858d24d4204SJose E. Roman 1859d24d4204SJose E. Roman PetscFunctionBegin; 18609566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 18619566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK)); 1862aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1863d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1864d24d4204SJose E. Roman m = PETSC_DECIDE; 18659566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1866d24d4204SJose E. Roman n = PETSC_DECIDE; 18679566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 18689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1869d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data; 18709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M)); 18719566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N)); 18729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 18739566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 18749566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 18759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 18769566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 18773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1878d24d4204SJose E. Roman } 1879