1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23f7ec113fSDamian Marek { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27f7ec113fSDamian Marek PetscFunctionReturn(0); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 31d24d4204SJose E. Roman { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 39d24d4204SJose E. Roman if (iascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer,&format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); 46d24d4204SJose E. Roman PetscFunctionReturn(0); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48d24d4204SJose E. Roman PetscFunctionReturn(0); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense,viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 55d24d4204SJose E. Roman PetscFunctionReturn(0); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 59d24d4204SJose E. Roman { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2],irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld*a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr*a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 72*57168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 76*57168dbeSPierre Jolivet PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 84d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 88d24d4204SJose E. Roman PetscFunctionReturn(0); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 91d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 92d24d4204SJose E. Roman { 93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94b12397e7SPierre Jolivet 95d24d4204SJose E. Roman PetscFunctionBegin; 96d24d4204SJose E. Roman switch (op) { 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 100d24d4204SJose E. Roman case MAT_SYMMETRIC: 101d24d4204SJose E. Roman case MAT_SORTED_FULL: 102d24d4204SJose E. Roman case MAT_HERMITIAN: 103d24d4204SJose E. Roman break; 104b12397e7SPierre Jolivet case MAT_ROW_ORIENTED: 105b12397e7SPierre Jolivet a->roworiented = flg; 106b12397e7SPierre Jolivet break; 107d24d4204SJose E. Roman default: 10898921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 109d24d4204SJose E. Roman } 110d24d4204SJose E. Roman PetscFunctionReturn(0); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman 113d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 114d24d4204SJose E. Roman { 115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 116d24d4204SJose E. Roman PetscInt i,j; 117d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented; 119d24d4204SJose E. Roman 120d24d4204SJose E. Roman PetscFunctionBegin; 121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 122d24d4204SJose E. Roman for (i=0;i<nr;i++) { 123d24d4204SJose E. Roman if (rows[i] < 0) continue; 1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i]+1,&gridx)); 125d24d4204SJose E. Roman for (j=0;j<nc;j++) { 126d24d4204SJose E. Roman if (cols[j] < 0) continue; 1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx)); 128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 129d24d4204SJose E. Roman if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 130b12397e7SPierre Jolivet if (roworiented) { 131d24d4204SJose E. Roman switch (imode) { 132d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 133b12397e7SPierre Jolivet default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 134b12397e7SPierre Jolivet } 135b12397e7SPierre Jolivet } else { 136b12397e7SPierre Jolivet switch (imode) { 137b12397e7SPierre Jolivet case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break; 138b12397e7SPierre Jolivet default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break; 139b12397e7SPierre Jolivet } 140d24d4204SJose E. Roman } 141d24d4204SJose E. Roman } else { 14228b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 143d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 144b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES))); 145d24d4204SJose E. Roman } 146d24d4204SJose E. Roman } 147d24d4204SJose E. Roman } 148d24d4204SJose E. Roman PetscFunctionReturn(0); 149d24d4204SJose E. Roman } 150d24d4204SJose E. Roman 151d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 152d24d4204SJose E. Roman { 153d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 154d24d4204SJose E. Roman PetscScalar *x2d,*y2d,alpha=1.0; 155d24d4204SJose E. Roman const PetscInt *ranges; 156d24d4204SJose E. Roman PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 157d24d4204SJose E. Roman 158d24d4204SJose E. Roman PetscFunctionBegin; 159d24d4204SJose E. Roman if (transpose) { 160d24d4204SJose E. Roman 161d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1629566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 1639566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 164d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 165792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 166d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1679566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 1689566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ 169d24d4204SJose E. Roman ylld = 1; 170792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 171d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 172d24d4204SJose E. Roman 173d24d4204SJose E. Roman /* allocate 2d vectors */ 174d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 175d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1769566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 177d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 178d24d4204SJose E. Roman 179d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 180792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 181d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 182792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 183d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 184d24d4204SJose E. Roman 185d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 186792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 187d24d4204SJose E. Roman 188d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 189792fecdfSBarry Smith if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 190d24d4204SJose E. Roman 191d24d4204SJose E. Roman /* call PBLAS subroutine */ 192792fecdfSBarry Smith PetscCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 193d24d4204SJose E. Roman 194d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 195792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 196d24d4204SJose E. Roman 197d24d4204SJose E. Roman } else { /* non-transpose */ 198d24d4204SJose E. Roman 199d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 2009566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 2019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ 202d24d4204SJose E. Roman xlld = 1; 203792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 204d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 2059566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 2069566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ 207d24d4204SJose E. Roman ylld = PetscMax(1,A->rmap->n); 208792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 209d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 210d24d4204SJose E. Roman 211d24d4204SJose E. Roman /* allocate 2d vectors */ 212d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 213d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 2149566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 215d24d4204SJose E. Roman ylld = PetscMax(1,lszy); 216d24d4204SJose E. Roman 217d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 218792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 219d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 220792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 221d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 222d24d4204SJose E. Roman 223d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 224792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 225d24d4204SJose E. Roman 226d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 227792fecdfSBarry Smith if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 228d24d4204SJose E. Roman 229d24d4204SJose E. Roman /* call PBLAS subroutine */ 230792fecdfSBarry Smith PetscCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 231d24d4204SJose E. Roman 232d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 233792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 234d24d4204SJose E. Roman 235d24d4204SJose E. Roman } 2369566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d,y2d)); 237d24d4204SJose E. Roman PetscFunctionReturn(0); 238d24d4204SJose E. Roman } 239d24d4204SJose E. Roman 240d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 241d24d4204SJose E. Roman { 242d24d4204SJose E. Roman const PetscScalar *xarray; 243d24d4204SJose E. Roman PetscScalar *yarray; 244d24d4204SJose E. Roman 245d24d4204SJose E. Roman PetscFunctionBegin; 2469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2479566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yarray)); 2489566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); 2499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yarray)); 251d24d4204SJose E. Roman PetscFunctionReturn(0); 252d24d4204SJose E. Roman } 253d24d4204SJose E. Roman 254d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 255d24d4204SJose E. Roman { 256d24d4204SJose E. Roman const PetscScalar *xarray; 257d24d4204SJose E. Roman PetscScalar *yarray; 258d24d4204SJose E. Roman 259d24d4204SJose E. Roman PetscFunctionBegin; 2609566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2619566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yarray)); 2629566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); 2639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yarray)); 265d24d4204SJose E. Roman PetscFunctionReturn(0); 266d24d4204SJose E. Roman } 267d24d4204SJose E. Roman 268d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 269d24d4204SJose E. Roman { 270d24d4204SJose E. Roman const PetscScalar *xarray; 271d24d4204SJose E. Roman PetscScalar *zarray; 272d24d4204SJose E. Roman 273d24d4204SJose E. Roman PetscFunctionBegin; 2749566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y,z)); 2759566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2769566063dSJacob Faibussowitsch PetscCall(VecGetArray(z,&zarray)); 2779566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); 2789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2799566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z,&zarray)); 280d24d4204SJose E. Roman PetscFunctionReturn(0); 281d24d4204SJose E. Roman } 282d24d4204SJose E. Roman 283d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 284d24d4204SJose E. Roman { 285d24d4204SJose E. Roman const PetscScalar *xarray; 286d24d4204SJose E. Roman PetscScalar *zarray; 287d24d4204SJose E. Roman 288d24d4204SJose E. Roman PetscFunctionBegin; 2899566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y,z)); 2909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2919566063dSJacob Faibussowitsch PetscCall(VecGetArray(z,&zarray)); 2929566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); 2939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z,&zarray)); 295d24d4204SJose E. Roman PetscFunctionReturn(0); 296d24d4204SJose E. Roman } 297d24d4204SJose E. Roman 298d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 299d24d4204SJose E. Roman { 300d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 301d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 302d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 303d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 304d24d4204SJose E. Roman PetscBLASInt one=1; 305d24d4204SJose E. Roman 306d24d4204SJose E. Roman PetscFunctionBegin; 307792fecdfSBarry Smith PetscCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 308d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 309d24d4204SJose E. Roman PetscFunctionReturn(0); 310d24d4204SJose E. Roman } 311d24d4204SJose E. Roman 312d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 313d24d4204SJose E. Roman { 314d24d4204SJose E. Roman PetscFunctionBegin; 3159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 3169566063dSJacob Faibussowitsch PetscCall(MatSetType(C,MATSCALAPACK)); 3179566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 318d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 319d24d4204SJose E. Roman PetscFunctionReturn(0); 320d24d4204SJose E. Roman } 321d24d4204SJose E. Roman 322d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 323d24d4204SJose E. Roman { 324d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 325d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 326d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 327d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 328d24d4204SJose E. Roman PetscBLASInt one=1; 329d24d4204SJose E. Roman 330d24d4204SJose E. Roman PetscFunctionBegin; 331792fecdfSBarry Smith PetscCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 332d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 333d24d4204SJose E. Roman PetscFunctionReturn(0); 334d24d4204SJose E. Roman } 335d24d4204SJose E. Roman 336d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 337d24d4204SJose E. Roman { 338d24d4204SJose E. Roman PetscFunctionBegin; 3399566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); 3409566063dSJacob Faibussowitsch PetscCall(MatSetType(C,MATSCALAPACK)); 3419566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 342d24d4204SJose E. Roman PetscFunctionReturn(0); 343d24d4204SJose E. Roman } 344d24d4204SJose E. Roman 345d24d4204SJose E. Roman /* --------------------------------------- */ 346d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 347d24d4204SJose E. Roman { 348d24d4204SJose E. Roman PetscFunctionBegin; 349d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 350d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 351d24d4204SJose E. Roman PetscFunctionReturn(0); 352d24d4204SJose E. Roman } 353d24d4204SJose E. Roman 354d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 355d24d4204SJose E. Roman { 356d24d4204SJose E. Roman PetscFunctionBegin; 357d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 358d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 359d24d4204SJose E. Roman PetscFunctionReturn(0); 360d24d4204SJose E. Roman } 361d24d4204SJose E. Roman 362d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 363d24d4204SJose E. Roman { 364d24d4204SJose E. Roman Mat_Product *product = C->product; 365d24d4204SJose E. Roman 366d24d4204SJose E. Roman PetscFunctionBegin; 367d24d4204SJose E. Roman switch (product->type) { 368d24d4204SJose E. Roman case MATPRODUCT_AB: 3699566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 370d24d4204SJose E. Roman break; 371d24d4204SJose E. Roman case MATPRODUCT_ABt: 3729566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 373d24d4204SJose E. Roman break; 37498921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 375d24d4204SJose E. Roman } 376d24d4204SJose E. Roman PetscFunctionReturn(0); 377d24d4204SJose E. Roman } 378d24d4204SJose E. Roman /* --------------------------------------- */ 379d24d4204SJose E. Roman 380d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 381d24d4204SJose E. Roman { 382d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 383d24d4204SJose E. Roman PetscScalar *darray,*d2d,v; 384d24d4204SJose E. Roman const PetscInt *ranges; 385d24d4204SJose E. Roman PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 386d24d4204SJose E. Roman 387d24d4204SJose E. Roman PetscFunctionBegin; 3889566063dSJacob Faibussowitsch PetscCall(VecGetArray(D,&darray)); 389d24d4204SJose E. Roman 390d24d4204SJose E. Roman if (A->rmap->N<=A->cmap->N) { /* row version */ 391d24d4204SJose E. Roman 392d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 3939566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 3949566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 395d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 396792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 397d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 398d24d4204SJose E. Roman 399d24d4204SJose E. Roman /* allocate 2d vector */ 400d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 4019566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 402d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 403d24d4204SJose E. Roman 404d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 405792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 406d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 407d24d4204SJose E. Roman 408d24d4204SJose E. Roman /* collect diagonal */ 409d24d4204SJose E. Roman for (j=1;j<=a->M;j++) { 410792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 411792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 412d24d4204SJose E. Roman } 413d24d4204SJose E. Roman 414d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 415792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 4169566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 417d24d4204SJose E. Roman 418d24d4204SJose E. Roman } else { /* column version */ 419d24d4204SJose E. Roman 420d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4219566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 4229566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 423d24d4204SJose E. Roman dlld = 1; 424792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 425d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 426d24d4204SJose E. Roman 427d24d4204SJose E. Roman /* allocate 2d vector */ 428d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 4299566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 430d24d4204SJose E. Roman 431d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 432792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 433d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 434d24d4204SJose E. Roman 435d24d4204SJose E. Roman /* collect diagonal */ 436d24d4204SJose E. Roman for (j=1;j<=a->N;j++) { 437792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 438792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 439d24d4204SJose E. Roman } 440d24d4204SJose E. Roman 441d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 442792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 4439566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 444d24d4204SJose E. Roman } 445d24d4204SJose E. Roman 4469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D,&darray)); 4479566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4489566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 449d24d4204SJose E. Roman PetscFunctionReturn(0); 450d24d4204SJose E. Roman } 451d24d4204SJose E. Roman 452d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 453d24d4204SJose E. Roman { 454d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 455d24d4204SJose E. Roman const PetscScalar *d; 456d24d4204SJose E. Roman const PetscInt *ranges; 457d24d4204SJose E. Roman PetscScalar *d2d; 458d24d4204SJose E. Roman PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 459d24d4204SJose E. Roman 460d24d4204SJose E. Roman PetscFunctionBegin; 461d24d4204SJose E. Roman if (R) { 4629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d)); 463d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4649566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 4659566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 466d24d4204SJose E. Roman dlld = 1; 467792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 468d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 469d24d4204SJose E. Roman 470d24d4204SJose E. Roman /* allocate 2d vector */ 471d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 4729566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 473d24d4204SJose E. Roman 474d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 475792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 476d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 477d24d4204SJose E. Roman 478d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 479792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 480d24d4204SJose E. Roman 481d24d4204SJose E. Roman /* broadcast along process columns */ 482d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 483d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 484d24d4204SJose E. Roman 485d24d4204SJose E. Roman /* local scaling */ 486d24d4204SJose E. Roman for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 487d24d4204SJose E. Roman 4889566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 4899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d)); 490d24d4204SJose E. Roman } 491d24d4204SJose E. Roman if (L) { 4929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d)); 493d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4949566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 4959566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 496d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 497792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 498d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 499d24d4204SJose E. Roman 500d24d4204SJose E. Roman /* allocate 2d vector */ 501d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 5029566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 503d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 504d24d4204SJose E. Roman 505d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 506792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 507d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 508d24d4204SJose E. Roman 509d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 510792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 511d24d4204SJose E. Roman 512d24d4204SJose E. Roman /* broadcast along process rows */ 513d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 514d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 515d24d4204SJose E. Roman 516d24d4204SJose E. Roman /* local scaling */ 517d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 518d24d4204SJose E. Roman 5199566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 5209566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d)); 521d24d4204SJose E. Roman } 522d24d4204SJose E. Roman PetscFunctionReturn(0); 523d24d4204SJose E. Roman } 524d24d4204SJose E. Roman 525d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 526d24d4204SJose E. Roman { 527d24d4204SJose E. Roman PetscFunctionBegin; 528d24d4204SJose E. Roman *missing = PETSC_FALSE; 529d24d4204SJose E. Roman PetscFunctionReturn(0); 530d24d4204SJose E. Roman } 531d24d4204SJose E. Roman 532d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 533d24d4204SJose E. Roman { 534d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 535d24d4204SJose E. Roman PetscBLASInt n,one=1; 536d24d4204SJose E. Roman 537d24d4204SJose E. Roman PetscFunctionBegin; 538d24d4204SJose E. Roman n = x->lld*x->locc; 539792fecdfSBarry Smith PetscCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 540d24d4204SJose E. Roman PetscFunctionReturn(0); 541d24d4204SJose E. Roman } 542d24d4204SJose E. Roman 543d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 544d24d4204SJose E. Roman { 545d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546d24d4204SJose E. Roman PetscBLASInt i,n; 547d24d4204SJose E. Roman PetscScalar v; 548d24d4204SJose E. Roman 549d24d4204SJose E. Roman PetscFunctionBegin; 550d24d4204SJose E. Roman n = PetscMin(x->M,x->N); 551d24d4204SJose E. Roman for (i=1;i<=n;i++) { 552792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 553d24d4204SJose E. Roman v += alpha; 554792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 555d24d4204SJose E. Roman } 556d24d4204SJose E. Roman PetscFunctionReturn(0); 557d24d4204SJose E. Roman } 558d24d4204SJose E. Roman 559d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 560d24d4204SJose E. Roman { 561d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 562d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 563d24d4204SJose E. Roman PetscBLASInt one=1; 564d24d4204SJose E. Roman PetscScalar beta=1.0; 565d24d4204SJose E. Roman 566d24d4204SJose E. Roman PetscFunctionBegin; 567d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y,1,X,3); 568792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 5699566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 570d24d4204SJose E. Roman PetscFunctionReturn(0); 571d24d4204SJose E. Roman } 572d24d4204SJose E. Roman 573d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 574d24d4204SJose E. Roman { 575d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 576d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 577d24d4204SJose E. Roman 578d24d4204SJose E. Roman PetscFunctionBegin; 5799566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 5809566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 581d24d4204SJose E. Roman PetscFunctionReturn(0); 582d24d4204SJose E. Roman } 583d24d4204SJose E. Roman 584d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 585d24d4204SJose E. Roman { 586d24d4204SJose E. Roman Mat Bs; 587d24d4204SJose E. Roman MPI_Comm comm; 588d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 589d24d4204SJose E. Roman 590d24d4204SJose E. Roman PetscFunctionBegin; 5919566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5929566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bs)); 5939566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 5949566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs,MATSCALAPACK)); 595d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 596d24d4204SJose E. Roman b->M = a->M; 597d24d4204SJose E. Roman b->N = a->N; 598d24d4204SJose E. Roman b->mb = a->mb; 599d24d4204SJose E. Roman b->nb = a->nb; 600d24d4204SJose E. Roman b->rsrc = a->rsrc; 601d24d4204SJose E. Roman b->csrc = a->csrc; 6029566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 603d24d4204SJose E. Roman *B = Bs; 604d24d4204SJose E. Roman if (op == MAT_COPY_VALUES) { 6059566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 606d24d4204SJose E. Roman } 607d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 608d24d4204SJose E. Roman PetscFunctionReturn(0); 609d24d4204SJose E. Roman } 610d24d4204SJose E. Roman 611d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 612d24d4204SJose E. Roman { 613d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 614d24d4204SJose E. Roman Mat Bs = *B; 615d24d4204SJose E. Roman PetscBLASInt one=1; 616d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 617d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 618d24d4204SJose E. Roman PetscInt i,j; 619d24d4204SJose E. Roman #endif 620d24d4204SJose E. Roman 621d24d4204SJose E. Roman PetscFunctionBegin; 6227fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B)); 623d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6249566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 625d24d4204SJose E. Roman *B = Bs; 626d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 627d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 628792fecdfSBarry Smith PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 629d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 630d24d4204SJose E. Roman /* undo conjugation */ 631d24d4204SJose E. Roman for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 632d24d4204SJose E. Roman #endif 633d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 634d24d4204SJose E. Roman PetscFunctionReturn(0); 635d24d4204SJose E. Roman } 636d24d4204SJose E. Roman 637d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 638d24d4204SJose E. Roman { 639d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 640d24d4204SJose E. Roman PetscInt i,j; 641d24d4204SJose E. Roman 642d24d4204SJose E. Roman PetscFunctionBegin; 643d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 644d24d4204SJose E. Roman PetscFunctionReturn(0); 645d24d4204SJose E. Roman } 646d24d4204SJose E. Roman 647d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 648d24d4204SJose E. Roman { 649d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 650d24d4204SJose E. Roman Mat Bs = *B; 651d24d4204SJose E. Roman PetscBLASInt one=1; 652d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 653d24d4204SJose E. Roman 654d24d4204SJose E. Roman PetscFunctionBegin; 655d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6569566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 657d24d4204SJose E. Roman *B = Bs; 658d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 659d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 660792fecdfSBarry Smith PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 661d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 662d24d4204SJose E. Roman PetscFunctionReturn(0); 663d24d4204SJose E. Roman } 664d24d4204SJose E. Roman 665d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 666d24d4204SJose E. Roman { 667d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 668d24d4204SJose E. Roman PetscScalar *x,*x2d; 669d24d4204SJose E. Roman const PetscInt *ranges; 670d24d4204SJose E. Roman PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 671d24d4204SJose E. Roman 672d24d4204SJose E. Roman PetscFunctionBegin; 6739566063dSJacob Faibussowitsch PetscCall(VecCopy(B,X)); 6749566063dSJacob Faibussowitsch PetscCall(VecGetArray(X,&x)); 675d24d4204SJose E. Roman 676d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 6779566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 6789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 679d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 680792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 681d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 682d24d4204SJose E. Roman 683d24d4204SJose E. Roman /* allocate 2d vector */ 684d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 6859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx,&x2d)); 686d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 687d24d4204SJose E. Roman 688d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 689792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 690d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 691d24d4204SJose E. Roman 692d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 693792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 694d24d4204SJose E. Roman 695d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 696d24d4204SJose E. Roman switch (A->factortype) { 697d24d4204SJose E. Roman case MAT_FACTOR_LU: 698792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 699d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 700d24d4204SJose E. Roman break; 701d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 702792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 703d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 704d24d4204SJose E. Roman break; 705d24d4204SJose E. Roman default: 706d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 707d24d4204SJose E. Roman } 708d24d4204SJose E. Roman 709d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 710792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 711d24d4204SJose E. Roman 7129566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 7139566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X,&x)); 714d24d4204SJose E. Roman PetscFunctionReturn(0); 715d24d4204SJose E. Roman } 716d24d4204SJose E. Roman 717d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 718d24d4204SJose E. Roman { 719d24d4204SJose E. Roman PetscFunctionBegin; 7209566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A,B,X)); 7219566063dSJacob Faibussowitsch PetscCall(VecAXPY(X,1,Y)); 722d24d4204SJose E. Roman PetscFunctionReturn(0); 723d24d4204SJose E. Roman } 724d24d4204SJose E. Roman 725d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 726d24d4204SJose E. Roman { 727d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 728d24d4204SJose E. Roman PetscBool flg1,flg2; 729d24d4204SJose E. Roman PetscBLASInt one=1,info; 730d24d4204SJose E. Roman 731d24d4204SJose E. Roman PetscFunctionBegin; 7329566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); 7339566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); 73408401ef6SPierre Jolivet PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 735d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B,1,X,2); 736d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)B->data; 737d24d4204SJose E. Roman x = (Mat_ScaLAPACK*)X->data; 7389566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); 739d24d4204SJose E. Roman 740d24d4204SJose E. Roman switch (A->factortype) { 741d24d4204SJose E. Roman case MAT_FACTOR_LU: 742792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 743d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 744d24d4204SJose E. Roman break; 745d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 746792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 747d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 748d24d4204SJose E. Roman break; 749d24d4204SJose E. Roman default: 750d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 751d24d4204SJose E. Roman } 752d24d4204SJose E. Roman PetscFunctionReturn(0); 753d24d4204SJose E. Roman } 754d24d4204SJose E. Roman 755d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 756d24d4204SJose E. Roman { 757d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 758d24d4204SJose E. Roman PetscBLASInt one=1,info; 759d24d4204SJose E. Roman 760d24d4204SJose E. Roman PetscFunctionBegin; 761d24d4204SJose E. Roman if (!a->pivots) { 7629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots)); 7639566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); 764d24d4204SJose E. Roman } 765792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 766d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf",info); 767d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 768d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 769d24d4204SJose E. Roman 7709566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7719566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 772d24d4204SJose E. Roman PetscFunctionReturn(0); 773d24d4204SJose E. Roman } 774d24d4204SJose E. Roman 775d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 776d24d4204SJose E. Roman { 777d24d4204SJose E. Roman PetscFunctionBegin; 7789566063dSJacob Faibussowitsch PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 7799566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info)); 780d24d4204SJose E. Roman PetscFunctionReturn(0); 781d24d4204SJose E. Roman } 782d24d4204SJose E. Roman 783d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 784d24d4204SJose E. Roman { 785d24d4204SJose E. Roman PetscFunctionBegin; 786d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 787d24d4204SJose E. Roman PetscFunctionReturn(0); 788d24d4204SJose E. Roman } 789d24d4204SJose E. Roman 790d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 791d24d4204SJose E. Roman { 792d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 793d24d4204SJose E. Roman PetscBLASInt one=1,info; 794d24d4204SJose E. Roman 795d24d4204SJose E. Roman PetscFunctionBegin; 796792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 797d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf",info); 798d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 799d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 800d24d4204SJose E. Roman 8019566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 8029566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 803d24d4204SJose E. Roman PetscFunctionReturn(0); 804d24d4204SJose E. Roman } 805d24d4204SJose E. Roman 806d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 807d24d4204SJose E. Roman { 808d24d4204SJose E. Roman PetscFunctionBegin; 8099566063dSJacob Faibussowitsch PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 8109566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info)); 811d24d4204SJose E. Roman PetscFunctionReturn(0); 812d24d4204SJose E. Roman } 813d24d4204SJose E. Roman 814d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 815d24d4204SJose E. Roman { 816d24d4204SJose E. Roman PetscFunctionBegin; 817d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 818d24d4204SJose E. Roman PetscFunctionReturn(0); 819d24d4204SJose E. Roman } 820d24d4204SJose E. Roman 821d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 822d24d4204SJose E. Roman { 823d24d4204SJose E. Roman PetscFunctionBegin; 824d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 825d24d4204SJose E. Roman PetscFunctionReturn(0); 826d24d4204SJose E. Roman } 827d24d4204SJose E. Roman 828d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 829d24d4204SJose E. Roman { 830d24d4204SJose E. Roman Mat B; 83159172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 832d24d4204SJose E. Roman 833d24d4204SJose E. Roman PetscFunctionBegin; 834d24d4204SJose E. Roman /* Create the factorization matrix */ 8359566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); 83666e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 837d24d4204SJose E. Roman B->factortype = ftype; 8389566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8399566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); 840d24d4204SJose E. Roman 8419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); 842d24d4204SJose E. Roman *F = B; 843d24d4204SJose E. Roman PetscFunctionReturn(0); 844d24d4204SJose E. Roman } 845d24d4204SJose E. Roman 846d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 847d24d4204SJose E. Roman { 848d24d4204SJose E. Roman PetscFunctionBegin; 8499566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); 8509566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); 851d24d4204SJose E. Roman PetscFunctionReturn(0); 852d24d4204SJose E. Roman } 853d24d4204SJose E. Roman 854d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 855d24d4204SJose E. Roman { 856d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 857d24d4204SJose E. Roman PetscBLASInt one=1,lwork=0; 858d24d4204SJose E. Roman const char *ntype; 859d68f4f38SPierre Jolivet PetscScalar *work=NULL,dummy; 860d24d4204SJose E. Roman 861d24d4204SJose E. Roman PetscFunctionBegin; 862d24d4204SJose E. Roman switch (type) { 863d24d4204SJose E. Roman case NORM_1: 864d24d4204SJose E. Roman ntype = "1"; 865d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 866d24d4204SJose E. Roman break; 867d24d4204SJose E. Roman case NORM_FROBENIUS: 868d24d4204SJose E. Roman ntype = "F"; 869d24d4204SJose E. Roman work = &dummy; 870d24d4204SJose E. Roman break; 871d24d4204SJose E. Roman case NORM_INFINITY: 872d24d4204SJose E. Roman ntype = "I"; 873d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 874d24d4204SJose E. Roman break; 875d24d4204SJose E. Roman default: 876d24d4204SJose E. Roman SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 877d24d4204SJose E. Roman } 8789566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork,&work)); 879d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 8809566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 881d24d4204SJose E. Roman PetscFunctionReturn(0); 882d24d4204SJose E. Roman } 883d24d4204SJose E. Roman 884d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 885d24d4204SJose E. Roman { 886d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 887d24d4204SJose E. Roman 888d24d4204SJose E. Roman PetscFunctionBegin; 8899566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc,a->lld*a->locc)); 890d24d4204SJose E. Roman PetscFunctionReturn(0); 891d24d4204SJose E. Roman } 892d24d4204SJose E. Roman 893d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 894d24d4204SJose E. Roman { 895d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 896d24d4204SJose E. Roman PetscInt i,n,nb,isrc,nproc,iproc,*idx; 897d24d4204SJose E. Roman 898d24d4204SJose E. Roman PetscFunctionBegin; 899d24d4204SJose E. Roman if (rows) { 900d24d4204SJose E. Roman n = a->locr; 901d24d4204SJose E. Roman nb = a->mb; 902d24d4204SJose E. Roman isrc = a->rsrc; 903d24d4204SJose E. Roman nproc = a->grid->nprow; 904d24d4204SJose E. Roman iproc = a->grid->myrow; 9059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&idx)); 906d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 9079566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows)); 908d24d4204SJose E. Roman } 909d24d4204SJose E. Roman if (cols) { 910d24d4204SJose E. Roman n = a->locc; 911d24d4204SJose E. Roman nb = a->nb; 912d24d4204SJose E. Roman isrc = a->csrc; 913d24d4204SJose E. Roman nproc = a->grid->npcol; 914d24d4204SJose E. Roman iproc = a->grid->mycol; 9159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&idx)); 916d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 9179566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols)); 918d24d4204SJose E. Roman } 919d24d4204SJose E. Roman PetscFunctionReturn(0); 920d24d4204SJose E. Roman } 921d24d4204SJose E. Roman 922d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 923d24d4204SJose E. Roman { 924d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 925d24d4204SJose E. Roman Mat Bmpi; 926d24d4204SJose E. Roman MPI_Comm comm; 9274b1a79daSJose E. Roman PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 9284b1a79daSJose E. Roman const PetscInt *ranges,*branges,*cwork; 9294b1a79daSJose E. Roman const PetscScalar *vwork; 930d24d4204SJose E. Roman PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 931d24d4204SJose E. Roman PetscScalar *barray; 9324b1a79daSJose E. Roman PetscBool differ=PETSC_FALSE; 9334b1a79daSJose E. Roman PetscMPIInt size; 934d24d4204SJose E. Roman 935d24d4204SJose E. Roman PetscFunctionBegin; 9369566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 9379566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 9384b1a79daSJose E. Roman 9394b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 9419566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges)); 9424b1a79daSJose E. Roman for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 9434b1a79daSJose E. Roman } 9444b1a79daSJose E. Roman 9454b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9469566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bmpi)); 9474b1a79daSJose E. Roman m = PETSC_DECIDE; 9489566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 9494b1a79daSJose E. Roman n = PETSC_DECIDE; 9509566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 9519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 9529566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi,MATDENSE)); 9539566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 9544b1a79daSJose E. Roman 9554b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9569566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 9574b1a79daSJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 958792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 9594b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit",info); 9604b1a79daSJose E. Roman 9614b1a79daSJose E. Roman /* redistribute matrix */ 9629566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi,&barray)); 963792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 9649566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 9659566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 9669566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 9674b1a79daSJose E. Roman 9684b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9699566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend)); 9704b1a79daSJose E. Roman for (i=rstart;i<rend;i++) { 9719566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork)); 9729566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES)); 9739566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork)); 9744b1a79daSJose E. Roman } 9759566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY)); 9769566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY)); 9779566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 9784b1a79daSJose E. Roman 9794b1a79daSJose E. Roman } else { /* normal cases */ 980d24d4204SJose E. Roman 981d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 982d24d4204SJose E. Roman else { 9839566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bmpi)); 98492c846b4SJose E. Roman m = PETSC_DECIDE; 9859566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 98692c846b4SJose E. Roman n = PETSC_DECIDE; 9879566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 9889566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 9899566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi,MATDENSE)); 9909566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 991d24d4204SJose E. Roman } 992d24d4204SJose E. Roman 993d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9949566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 995d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 996792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 997d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 998d24d4204SJose E. Roman 999d24d4204SJose E. Roman /* redistribute matrix */ 10009566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi,&barray)); 1001792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 10029566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 1003d24d4204SJose E. Roman 10049566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 10059566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1006d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10079566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&Bmpi)); 1008d24d4204SJose E. Roman } else *B = Bmpi; 10094b1a79daSJose E. Roman } 1010d24d4204SJose E. Roman PetscFunctionReturn(0); 1011d24d4204SJose E. Roman } 1012d24d4204SJose E. Roman 1013b12397e7SPierre Jolivet static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct) 1014b12397e7SPierre Jolivet { 1015b12397e7SPierre Jolivet const PetscInt *ranges; 1016b12397e7SPierre Jolivet PetscMPIInt size; 1017b12397e7SPierre Jolivet PetscInt i,n; 1018b12397e7SPierre Jolivet 1019b12397e7SPierre Jolivet PetscFunctionBegin; 1020b12397e7SPierre Jolivet *correct = PETSC_TRUE; 1021b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm,&size)); 1022b12397e7SPierre Jolivet if (size>1) { 1023b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map,&ranges)); 1024b12397e7SPierre Jolivet n = ranges[1]-ranges[0]; 1025b12397e7SPierre Jolivet for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break; 1026b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n)); 1027b12397e7SPierre Jolivet } 1028b12397e7SPierre Jolivet PetscFunctionReturn(0); 1029b12397e7SPierre Jolivet } 1030b12397e7SPierre Jolivet 1031d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1032d24d4204SJose E. Roman { 1033d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1034d24d4204SJose E. Roman Mat Bmpi; 1035d24d4204SJose E. Roman MPI_Comm comm; 103692c846b4SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1037b12397e7SPierre Jolivet const PetscInt *ranges,*rows,*cols; 1038d24d4204SJose E. Roman PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1039d24d4204SJose E. Roman PetscScalar *aarray; 1040b12397e7SPierre Jolivet IS ir,ic; 10414e8b52a1SJose E. Roman PetscInt lda; 1042b12397e7SPierre Jolivet PetscBool flg; 1043d24d4204SJose E. Roman 1044d24d4204SJose E. Roman PetscFunctionBegin; 10459566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1046d24d4204SJose E. Roman 1047d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1048d24d4204SJose E. Roman else { 10499566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bmpi)); 105092c846b4SJose E. Roman m = PETSC_DECIDE; 10519566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 105292c846b4SJose E. Roman n = PETSC_DECIDE; 10539566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 10549566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 10559566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi,MATSCALAPACK)); 10569566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1057d24d4204SJose E. Roman } 1058d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bmpi->data; 1059d24d4204SJose E. Roman 1060b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A,&lda)); 1061b12397e7SPierre Jolivet PetscCall(MatDenseGetArray(A,&aarray)); 1062b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg)); 1063b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg)); 1064b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1065d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 10669566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 10679566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ 10684e8b52a1SJose E. Roman lld = PetscMax(lda,1); /* local leading dimension */ 1069792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1070d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1071d24d4204SJose E. Roman 1072d24d4204SJose E. Roman /* redistribute matrix */ 1073792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1074b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE; 1075b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1076b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")",lda,A->rmap->n); 1077b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE; 1078b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A,&ir,&ic)); 1079b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir,&rows)); 1080b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic,&cols)); 1081b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES)); 1082b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir,&rows)); 1083b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic,&cols)); 1084b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic)); 1085b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir)); 1086b12397e7SPierre Jolivet } 10879566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A,&aarray)); 10889566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 10899566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1090d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10919566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&Bmpi)); 1092d24d4204SJose E. Roman } else *B = Bmpi; 1093d24d4204SJose E. Roman PetscFunctionReturn(0); 1094d24d4204SJose E. Roman } 1095d24d4204SJose E. Roman 1096d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1097d24d4204SJose E. Roman { 1098d24d4204SJose E. Roman Mat mat_scal; 1099d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1100d24d4204SJose E. Roman const PetscInt *cols; 1101d24d4204SJose E. Roman const PetscScalar *vals; 1102d24d4204SJose E. Roman 1103d24d4204SJose E. Roman PetscFunctionBegin; 1104d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1105d24d4204SJose E. Roman mat_scal = *newmat; 11069566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1107d24d4204SJose E. Roman } else { 11089566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1109d24d4204SJose E. Roman m = PETSC_DECIDE; 11109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1111d24d4204SJose E. Roman n = PETSC_DECIDE; 11129566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 11139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 11149566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 11159566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1116d24d4204SJose E. Roman } 1117d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 11189566063dSJacob Faibussowitsch PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 11199566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES)); 11209566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1121d24d4204SJose E. Roman } 11229566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 11239566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1124d24d4204SJose E. Roman 11259566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1126d24d4204SJose E. Roman else *newmat = mat_scal; 1127d24d4204SJose E. Roman PetscFunctionReturn(0); 1128d24d4204SJose E. Roman } 1129d24d4204SJose E. Roman 1130d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1131d24d4204SJose E. Roman { 1132d24d4204SJose E. Roman Mat mat_scal; 1133d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1134d24d4204SJose E. Roman const PetscInt *cols; 1135d24d4204SJose E. Roman const PetscScalar *vals; 1136d24d4204SJose E. Roman PetscScalar v; 1137d24d4204SJose E. Roman 1138d24d4204SJose E. Roman PetscFunctionBegin; 1139d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1140d24d4204SJose E. Roman mat_scal = *newmat; 11419566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1142d24d4204SJose E. Roman } else { 11439566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1144d24d4204SJose E. Roman m = PETSC_DECIDE; 11459566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1146d24d4204SJose E. Roman n = PETSC_DECIDE; 11479566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 11489566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 11499566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 11509566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1151d24d4204SJose E. Roman } 11529566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1153d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 11549566063dSJacob Faibussowitsch PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 11559566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES)); 1156d24d4204SJose E. Roman for (j=0;j<ncols;j++) { /* lower triangular part */ 1157d24d4204SJose E. Roman if (cols[j] == row) continue; 1158b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 11599566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); 1160d24d4204SJose E. Roman } 11619566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1162d24d4204SJose E. Roman } 11639566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 11649566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 11659566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1166d24d4204SJose E. Roman 11679566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1168d24d4204SJose E. Roman else *newmat = mat_scal; 1169d24d4204SJose E. Roman PetscFunctionReturn(0); 1170d24d4204SJose E. Roman } 1171d24d4204SJose E. Roman 1172d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1173d24d4204SJose E. Roman { 1174d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1175d24d4204SJose E. Roman PetscInt sz=0; 1176d24d4204SJose E. Roman 1177d24d4204SJose E. Roman PetscFunctionBegin; 11789566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11799566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1180d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1181d24d4204SJose E. Roman 11829566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11839566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld,a->locc,&sz)); 11849566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz,&a->loc)); 11859566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); 1186d24d4204SJose E. Roman 1187d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1188d24d4204SJose E. Roman PetscFunctionReturn(0); 1189d24d4204SJose E. Roman } 1190d24d4204SJose E. Roman 1191d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1192d24d4204SJose E. Roman { 1193d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1194d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1195d24d4204SJose E. Roman PetscBool flg; 1196d24d4204SJose E. Roman MPI_Comm icomm; 1197d24d4204SJose E. Roman 1198d24d4204SJose E. Roman PetscFunctionBegin; 11999566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 12009566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 12019566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 12029566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 12039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1204d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1205d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1206d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1207d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 12089566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 12099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); 1210d24d4204SJose E. Roman } 12119566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 12129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); 12139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 12149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); 12159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); 12169566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1217d24d4204SJose E. Roman PetscFunctionReturn(0); 1218d24d4204SJose E. Roman } 1219d24d4204SJose E. Roman 1220d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1221d24d4204SJose E. Roman { 1222d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1223d24d4204SJose E. Roman PetscBLASInt info=0; 1224b12397e7SPierre Jolivet PetscBool flg; 1225d24d4204SJose E. Roman 1226d24d4204SJose E. Roman PetscFunctionBegin; 12279566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12289566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1229d24d4204SJose E. Roman 1230b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1231b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg)); 1232b12397e7SPierre Jolivet PetscCheck(flg,A->rmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1233b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg)); 1234b12397e7SPierre Jolivet PetscCheck(flg,A->cmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1235d24d4204SJose E. Roman 1236d24d4204SJose E. Roman /* compute local sizes */ 12379566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N,&a->M)); 12389566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N,&a->N)); 1239d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1240d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1241d24d4204SJose E. Roman a->lld = PetscMax(1,a->locr); 1242d24d4204SJose E. Roman 1243d24d4204SJose E. Roman /* allocate local array */ 12449566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1245d24d4204SJose E. Roman 1246d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1247792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1248d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1249d24d4204SJose E. Roman PetscFunctionReturn(0); 1250d24d4204SJose E. Roman } 1251d24d4204SJose E. Roman 1252d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1253d24d4204SJose E. Roman { 1254d24d4204SJose E. Roman PetscInt nstash,reallocs; 1255d24d4204SJose E. Roman 1256d24d4204SJose E. Roman PetscFunctionBegin; 1257d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 12589566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL)); 12599566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); 12609566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 1261d24d4204SJose E. Roman PetscFunctionReturn(0); 1262d24d4204SJose E. Roman } 1263d24d4204SJose E. Roman 1264d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1265d24d4204SJose E. Roman { 1266d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1267d24d4204SJose E. Roman PetscMPIInt n; 1268d24d4204SJose E. Roman PetscInt i,flg,*row,*col; 1269d24d4204SJose E. Roman PetscScalar *val; 1270d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1271d24d4204SJose E. Roman 1272d24d4204SJose E. Roman PetscFunctionBegin; 1273d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1274d24d4204SJose E. Roman while (1) { 12759566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); 1276d24d4204SJose E. Roman if (!flg) break; 1277d24d4204SJose E. Roman for (i=0;i<n;i++) { 12789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i]+1,&gridx)); 12799566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i]+1,&gcidx)); 1280792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1281aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1282d24d4204SJose E. Roman switch (A->insertmode) { 1283d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1284d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 128598921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1286d24d4204SJose E. Roman } 1287d24d4204SJose E. Roman } 1288d24d4204SJose E. Roman } 12899566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 1290d24d4204SJose E. Roman PetscFunctionReturn(0); 1291d24d4204SJose E. Roman } 1292d24d4204SJose E. Roman 1293d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1294d24d4204SJose E. Roman { 1295d24d4204SJose E. Roman Mat Adense,As; 1296d24d4204SJose E. Roman MPI_Comm comm; 1297d24d4204SJose E. Roman 1298d24d4204SJose E. Roman PetscFunctionBegin; 12999566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm)); 13009566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Adense)); 13019566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense,MATDENSE)); 13029566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense,viewer)); 13039566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); 13049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 13059566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat,&As)); 1306d24d4204SJose E. Roman PetscFunctionReturn(0); 1307d24d4204SJose E. Roman } 1308d24d4204SJose E. Roman 1309d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 1310d24d4204SJose E. Roman static struct _MatOps MatOps_Values = { 1311d24d4204SJose E. Roman MatSetValues_ScaLAPACK, 1312d24d4204SJose E. Roman 0, 1313d24d4204SJose E. Roman 0, 1314d24d4204SJose E. Roman MatMult_ScaLAPACK, 1315d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1316d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1317d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1318d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1319d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1320d24d4204SJose E. Roman 0, 1321d24d4204SJose E. Roman /*10*/ 0, 1322d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1323d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1324d24d4204SJose E. Roman 0, 1325d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1326d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1329d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1330d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1331d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1332d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1333d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1334d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1335d24d4204SJose E. Roman /*24*/ 0, 1336d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1337d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1338d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1339d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1340d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1341d24d4204SJose E. Roman 0, 1342d24d4204SJose E. Roman 0, 1343d24d4204SJose E. Roman 0, 1344d24d4204SJose E. Roman 0, 1345d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1346d24d4204SJose E. Roman 0, 1347d24d4204SJose E. Roman 0, 1348d24d4204SJose E. Roman 0, 1349d24d4204SJose E. Roman 0, 1350d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1351d24d4204SJose E. Roman 0, 1352d24d4204SJose E. Roman 0, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1355d24d4204SJose E. Roman /*44*/ 0, 1356d24d4204SJose E. Roman MatScale_ScaLAPACK, 1357d24d4204SJose E. Roman MatShift_ScaLAPACK, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman 0, 1360d24d4204SJose E. Roman /*49*/ 0, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman 0, 1365d24d4204SJose E. Roman /*54*/ 0, 1366d24d4204SJose E. Roman 0, 1367d24d4204SJose E. Roman 0, 1368d24d4204SJose E. Roman 0, 1369d24d4204SJose E. Roman 0, 1370d24d4204SJose E. Roman /*59*/ 0, 1371d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1372d24d4204SJose E. Roman MatView_ScaLAPACK, 1373d24d4204SJose E. Roman 0, 1374d24d4204SJose E. Roman 0, 1375d24d4204SJose E. Roman /*64*/ 0, 1376d24d4204SJose E. Roman 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman 0, 1379d24d4204SJose E. Roman 0, 1380d24d4204SJose E. Roman /*69*/ 0, 1381d24d4204SJose E. Roman 0, 1382d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman 0, 1385d24d4204SJose E. Roman /*74*/ 0, 1386d24d4204SJose E. Roman 0, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman 0, 1390d24d4204SJose E. Roman /*79*/ 0, 1391d24d4204SJose E. Roman 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1395d24d4204SJose E. Roman /*84*/ 0, 1396d24d4204SJose E. Roman 0, 1397d24d4204SJose E. Roman 0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman 0, 1400d24d4204SJose E. Roman /*89*/ 0, 1401d24d4204SJose E. Roman 0, 1402d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman 0, 1405d24d4204SJose E. Roman /*94*/ 0, 1406d24d4204SJose E. Roman 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1409d24d4204SJose E. Roman 0, 1410d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1411d24d4204SJose E. Roman 0, 1412d24d4204SJose E. Roman 0, 1413d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1414d24d4204SJose E. Roman 0, 1415d24d4204SJose E. Roman /*104*/0, 1416d24d4204SJose E. Roman 0, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman 0, 1420d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK, 1421d24d4204SJose E. Roman 0, 1422d24d4204SJose E. Roman 0, 1423d24d4204SJose E. Roman 0, 1424d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1425d24d4204SJose E. Roman /*114*/0, 1426d24d4204SJose E. Roman 0, 1427d24d4204SJose E. Roman 0, 1428d24d4204SJose E. Roman 0, 1429d24d4204SJose E. Roman 0, 1430d24d4204SJose E. Roman /*119*/0, 1431d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1432d24d4204SJose E. Roman 0, 1433d24d4204SJose E. Roman 0, 1434d24d4204SJose E. Roman 0, 1435d24d4204SJose E. Roman /*124*/0, 1436d24d4204SJose E. Roman 0, 1437d24d4204SJose E. Roman 0, 1438d24d4204SJose E. Roman 0, 1439d24d4204SJose E. Roman 0, 1440d24d4204SJose E. Roman /*129*/0, 1441d24d4204SJose E. Roman 0, 1442d24d4204SJose E. Roman 0, 1443d24d4204SJose E. Roman 0, 1444d24d4204SJose E. Roman 0, 1445d24d4204SJose E. Roman /*134*/0, 1446d24d4204SJose E. Roman 0, 1447d24d4204SJose E. Roman 0, 1448d24d4204SJose E. Roman 0, 1449d24d4204SJose E. Roman 0, 1450d24d4204SJose E. Roman 0, 1451d24d4204SJose E. Roman /*140*/0, 1452d24d4204SJose E. Roman 0, 1453d24d4204SJose E. Roman 0, 1454d24d4204SJose E. Roman 0, 1455d24d4204SJose E. Roman 0, 1456d24d4204SJose E. Roman /*145*/0, 1457d24d4204SJose E. Roman 0, 145899a7f59eSMark Adams 0, 145999a7f59eSMark Adams 0, 14607fb60732SBarry Smith 0, 14617fb60732SBarry Smith /*150*/0 1462d24d4204SJose E. Roman }; 1463d24d4204SJose E. Roman 1464d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1465d24d4204SJose E. Roman { 1466d24d4204SJose E. Roman PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1467d24d4204SJose E. Roman PetscInt size=stash->size,nsends; 1468d24d4204SJose E. Roman PetscInt count,*sindices,**rindices,i,j,l; 1469d24d4204SJose E. Roman PetscScalar **rvalues,*svalues; 1470d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1471d24d4204SJose E. Roman MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1472d24d4204SJose E. Roman PetscMPIInt *sizes,*nlengths,nreceives; 1473d24d4204SJose E. Roman PetscInt *sp_idx,*sp_idy; 1474d24d4204SJose E. Roman PetscScalar *sp_val; 1475d24d4204SJose E. Roman PetscMatStashSpace space,space_next; 1476d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1477d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1478d24d4204SJose E. Roman 1479d24d4204SJose E. Roman PetscFunctionBegin; 1480d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1481d24d4204SJose E. Roman InsertMode addv; 14821c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 148308401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1484d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1485d24d4204SJose E. Roman } 1486d24d4204SJose E. Roman 1487d24d4204SJose E. Roman bs2 = stash->bs*stash->bs; 1488d24d4204SJose E. Roman 1489d24d4204SJose E. Roman /* first count number of contributors to each processor */ 14909566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size,&nlengths)); 14919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n+1,&owner)); 1492d24d4204SJose E. Roman 1493d24d4204SJose E. Roman i = j = 0; 1494d24d4204SJose E. Roman space = stash->space_head; 1495d24d4204SJose E. Roman while (space) { 1496d24d4204SJose E. Roman space_next = space->next; 1497d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 14989566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx)); 14999566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx)); 1500792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1501d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1502d24d4204SJose E. Roman nlengths[j]++; owner[i] = j; 1503d24d4204SJose E. Roman i++; 1504d24d4204SJose E. Roman } 1505d24d4204SJose E. Roman space = space_next; 1506d24d4204SJose E. Roman } 1507d24d4204SJose E. Roman 1508d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 15099566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size,&sizes)); 1510d24d4204SJose E. Roman for (i=0, nsends=0; i<size; i++) { 1511d24d4204SJose E. Roman if (nlengths[i]) { 1512d24d4204SJose E. Roman sizes[i] = 1; nsends++; 1513d24d4204SJose E. Roman } 1514d24d4204SJose E. Roman } 1515d24d4204SJose E. Roman 1516d24d4204SJose E. Roman {PetscMPIInt *onodes,*olengths; 1517d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 15189566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 15199566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 1520d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1521d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] *=2; 15229566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 1523d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1524d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 15259566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 15269566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 15279566063dSJacob Faibussowitsch PetscCall(PetscFree(olengths));} 1528d24d4204SJose E. Roman 1529d24d4204SJose E. Roman /* do sends: 1530d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1531d24d4204SJose E. Roman the ith processor 1532d24d4204SJose E. Roman */ 15339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 15349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nsends,&send_waits)); 15359566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size,&startv,size,&starti)); 1536d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 1537d24d4204SJose E. Roman startv[0] = 0; starti[0] = 0; 1538d24d4204SJose E. Roman for (i=1; i<size; i++) { 1539d24d4204SJose E. Roman startv[i] = startv[i-1] + nlengths[i-1]; 1540d24d4204SJose E. Roman starti[i] = starti[i-1] + 2*nlengths[i-1]; 1541d24d4204SJose E. Roman } 1542d24d4204SJose E. Roman 1543d24d4204SJose E. Roman i = 0; 1544d24d4204SJose E. Roman space = stash->space_head; 1545d24d4204SJose E. Roman while (space) { 1546d24d4204SJose E. Roman space_next = space->next; 1547d24d4204SJose E. Roman sp_idx = space->idx; 1548d24d4204SJose E. Roman sp_idy = space->idy; 1549d24d4204SJose E. Roman sp_val = space->val; 1550d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1551d24d4204SJose E. Roman j = owner[i]; 1552d24d4204SJose E. Roman if (bs2 == 1) { 1553d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1554d24d4204SJose E. Roman } else { 1555d24d4204SJose E. Roman PetscInt k; 1556d24d4204SJose E. Roman PetscScalar *buf1,*buf2; 1557d24d4204SJose E. Roman buf1 = svalues+bs2*startv[j]; 1558d24d4204SJose E. Roman buf2 = space->val + bs2*l; 1559d24d4204SJose E. Roman for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1560d24d4204SJose E. Roman } 1561d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1562d24d4204SJose E. Roman sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1563d24d4204SJose E. Roman startv[j]++; 1564d24d4204SJose E. Roman starti[j]++; 1565d24d4204SJose E. Roman i++; 1566d24d4204SJose E. Roman } 1567d24d4204SJose E. Roman space = space_next; 1568d24d4204SJose E. Roman } 1569d24d4204SJose E. Roman startv[0] = 0; 1570d24d4204SJose E. Roman for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1571d24d4204SJose E. Roman 1572d24d4204SJose E. Roman for (i=0,count=0; i<size; i++) { 1573d24d4204SJose E. Roman if (sizes[i]) { 15749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 15759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 1576d24d4204SJose E. Roman } 1577d24d4204SJose E. Roman } 1578d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 15799566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends)); 1580d24d4204SJose E. Roman for (i=0; i<size; i++) { 1581d24d4204SJose E. Roman if (sizes[i]) { 15829566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 1583d24d4204SJose E. Roman } 1584d24d4204SJose E. Roman } 1585d24d4204SJose E. Roman #endif 15869566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 15879566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 15889566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv,starti)); 15899566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1590d24d4204SJose E. Roman 1591d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 15929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nreceives,&recv_waits)); 1593d24d4204SJose E. Roman 1594d24d4204SJose E. Roman for (i=0; i<nreceives; i++) { 1595d24d4204SJose E. Roman recv_waits[2*i] = recv_waits1[i]; 1596d24d4204SJose E. Roman recv_waits[2*i+1] = recv_waits2[i]; 1597d24d4204SJose E. Roman } 1598d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1599d24d4204SJose E. Roman 16009566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 16019566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1602d24d4204SJose E. Roman 1603d24d4204SJose E. Roman stash->svalues = svalues; 1604d24d4204SJose E. Roman stash->sindices = sindices; 1605d24d4204SJose E. Roman stash->rvalues = rvalues; 1606d24d4204SJose E. Roman stash->rindices = rindices; 1607d24d4204SJose E. Roman stash->send_waits = send_waits; 1608d24d4204SJose E. Roman stash->nsends = nsends; 1609d24d4204SJose E. Roman stash->nrecvs = nreceives; 1610d24d4204SJose E. Roman stash->reproduce_count = 0; 1611d24d4204SJose E. Roman PetscFunctionReturn(0); 1612d24d4204SJose E. Roman } 1613d24d4204SJose E. Roman 1614d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1615d24d4204SJose E. Roman { 1616d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1617d24d4204SJose E. Roman 1618d24d4204SJose E. Roman PetscFunctionBegin; 161928b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1620aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); 1621aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); 16229566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 16239566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1624d24d4204SJose E. Roman PetscFunctionReturn(0); 1625d24d4204SJose E. Roman } 1626d24d4204SJose E. Roman 1627d24d4204SJose E. Roman /*@ 16286aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1629d24d4204SJose E. Roman the ScaLAPACK matrix 1630d24d4204SJose E. Roman 1631d24d4204SJose E. Roman Logically Collective on A 1632d24d4204SJose E. Roman 1633d8d19677SJose E. Roman Input Parameters: 1634d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1635d24d4204SJose E. Roman . mb - the row block size 1636d24d4204SJose E. Roman - nb - the column block size 1637d24d4204SJose E. Roman 1638d24d4204SJose E. Roman Level: intermediate 1639d24d4204SJose E. Roman 1640db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1641d24d4204SJose E. Roman @*/ 1642d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1643d24d4204SJose E. Roman { 1644d24d4204SJose E. Roman PetscFunctionBegin; 1645d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1646d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,mb,2); 1647d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,nb,3); 1648cac4c232SBarry Smith PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)); 1649d24d4204SJose E. Roman PetscFunctionReturn(0); 1650d24d4204SJose E. Roman } 1651d24d4204SJose E. Roman 1652d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1653d24d4204SJose E. Roman { 1654d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1655d24d4204SJose E. Roman 1656d24d4204SJose E. Roman PetscFunctionBegin; 1657d24d4204SJose E. Roman if (mb) *mb = a->mb; 1658d24d4204SJose E. Roman if (nb) *nb = a->nb; 1659d24d4204SJose E. Roman PetscFunctionReturn(0); 1660d24d4204SJose E. Roman } 1661d24d4204SJose E. Roman 1662d24d4204SJose E. Roman /*@ 16636aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1664d24d4204SJose E. Roman the ScaLAPACK matrix 1665d24d4204SJose E. Roman 1666d24d4204SJose E. Roman Not collective 1667d24d4204SJose E. Roman 1668d24d4204SJose E. Roman Input Parameter: 1669d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1670d24d4204SJose E. Roman 1671d24d4204SJose E. Roman Output Parameters: 1672d24d4204SJose E. Roman + mb - the row block size 1673d24d4204SJose E. Roman - nb - the column block size 1674d24d4204SJose E. Roman 1675d24d4204SJose E. Roman Level: intermediate 1676d24d4204SJose E. Roman 1677db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1678d24d4204SJose E. Roman @*/ 1679d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1680d24d4204SJose E. Roman { 1681d24d4204SJose E. Roman PetscFunctionBegin; 1682d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1683cac4c232SBarry Smith PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)); 1684d24d4204SJose E. Roman PetscFunctionReturn(0); 1685d24d4204SJose E. Roman } 1686d24d4204SJose E. Roman 1687d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1688d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1689d24d4204SJose E. Roman 1690d24d4204SJose E. Roman /*MC 1691d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1692d24d4204SJose E. Roman 1693d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1694d24d4204SJose E. Roman 1695d24d4204SJose E. Roman Options Database Keys: 1696d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 169789bba20eSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1698d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1699d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1700d24d4204SJose E. Roman 170189bba20eSBarry Smith Note: 170289bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 170389bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 170489bba20eSBarry Smith the given rank. 170589bba20eSBarry Smith 1706d24d4204SJose E. Roman Level: beginner 1707d24d4204SJose E. Roman 170889bba20eSBarry Smith .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1709d24d4204SJose E. Roman M*/ 1710d24d4204SJose E. Roman 1711d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1712d24d4204SJose E. Roman { 1713d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1714d24d4204SJose E. Roman PetscBool flg,flg1; 1715d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1716d24d4204SJose E. Roman MPI_Comm icomm; 1717d24d4204SJose E. Roman PetscBLASInt nprow,npcol,myrow,mycol; 1718d24d4204SJose E. Roman PetscInt optv1,k=2,array[2]={0,0}; 1719d24d4204SJose E. Roman PetscMPIInt size; 1720d24d4204SJose E. Roman 1721d24d4204SJose E. Roman PetscFunctionBegin; 17229566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 1723d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1724d24d4204SJose E. Roman 17259566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); 1726d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1727d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1728d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1729d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1730d24d4204SJose E. Roman 17319566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&a)); 1732d24d4204SJose E. Roman A->data = (void*)a; 1733d24d4204SJose E. Roman 1734d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1735d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 17369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); 17379566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 17389566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); 1739d24d4204SJose E. Roman } 17409566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 17419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1742d24d4204SJose E. Roman if (!flg) { 17439566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&grid)); 1744d24d4204SJose E. Roman 17459566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm,&size)); 1746d24d4204SJose E. Roman grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1747d24d4204SJose E. Roman 1748d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat"); 17499566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); 1750d24d4204SJose E. Roman if (flg1) { 175108401ef6SPierre Jolivet PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); 1752d24d4204SJose E. Roman grid->nprow = optv1; 1753d24d4204SJose E. Roman } 1754d0609cedSBarry Smith PetscOptionsEnd(); 1755d24d4204SJose E. Roman 1756d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1757d24d4204SJose E. Roman grid->npcol = size/grid->nprow; 17589566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow,&nprow)); 17599566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol,&npcol)); 1760f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1761d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1762d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1763d24d4204SJose E. Roman grid->grid_refct = 1; 1764d24d4204SJose E. Roman grid->nprow = nprow; 1765d24d4204SJose E. Roman grid->npcol = npcol; 1766d24d4204SJose E. Roman grid->myrow = myrow; 1767d24d4204SJose E. Roman grid->mycol = mycol; 1768d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1769f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1770d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1771f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1772d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol,"R",size,1); 17739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); 1774d24d4204SJose E. Roman 1775d24d4204SJose E. Roman } else grid->grid_refct++; 17769566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1777d24d4204SJose E. Roman a->grid = grid; 1778d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1779d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1780d24d4204SJose E. Roman 1781d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat"); 17829566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); 1783d24d4204SJose E. Roman if (flg) { 1784d24d4204SJose E. Roman a->mb = array[0]; 1785d24d4204SJose E. Roman a->nb = (k>1)? array[1]: a->mb; 1786d24d4204SJose E. Roman } 1787d0609cedSBarry Smith PetscOptionsEnd(); 1788d24d4204SJose E. Roman 1789b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE; 17909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); 17919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); 17929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); 17939566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); 1794d24d4204SJose E. Roman PetscFunctionReturn(0); 1795d24d4204SJose E. Roman } 1796d24d4204SJose E. Roman 1797d24d4204SJose E. Roman /*@C 1798d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1799d24d4204SJose E. Roman (2D block cyclic distribution). 1800d24d4204SJose E. Roman 1801d24d4204SJose E. Roman Collective 1802d24d4204SJose E. Roman 1803d24d4204SJose E. Roman Input Parameters: 1804d24d4204SJose E. Roman + comm - MPI communicator 1805d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1806d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1807d24d4204SJose E. Roman . M - number of global rows 1808d24d4204SJose E. Roman . N - number of global columns 1809d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1810d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1811d24d4204SJose E. Roman 1812d24d4204SJose E. Roman Output Parameter: 1813d24d4204SJose E. Roman . A - the matrix 1814d24d4204SJose E. Roman 1815d24d4204SJose E. Roman Options Database Keys: 1816d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1817d24d4204SJose E. Roman 1818d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1819d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1820d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1821d24d4204SJose E. Roman 1822d24d4204SJose E. Roman Notes: 1823d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1824d24d4204SJose E. Roman is chosen. 1825d24d4204SJose E. Roman 1826d24d4204SJose E. Roman Storage Information: 1827d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1828d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1829d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1830d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1831d24d4204SJose E. Roman 1832d24d4204SJose E. Roman Level: intermediate 1833d24d4204SJose E. Roman 1834db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1835d24d4204SJose E. Roman @*/ 1836d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1837d24d4204SJose E. Roman { 1838d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1839d24d4204SJose E. Roman PetscInt m,n; 1840d24d4204SJose E. Roman 1841d24d4204SJose E. Roman PetscFunctionBegin; 18429566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 18439566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATSCALAPACK)); 1844aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1845d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1846d24d4204SJose E. Roman m = PETSC_DECIDE; 18479566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 1848d24d4204SJose E. Roman n = PETSC_DECIDE; 18499566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 18509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,M,N)); 1851d24d4204SJose E. Roman a = (Mat_ScaLAPACK*)(*A)->data; 18529566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M,&a->M)); 18539566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N,&a->N)); 18549566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 18559566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 18569566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc,&a->rsrc)); 18579566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc,&a->csrc)); 18589566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 1859d24d4204SJose E. Roman PetscFunctionReturn(0); 1860d24d4204SJose E. Roman } 1861