1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23f7ec113fSDamian Marek { 24f7ec113fSDamian Marek PetscFunctionBegin; 259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); 269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27f7ec113fSDamian Marek PetscFunctionReturn(0); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 31d24d4204SJose E. Roman { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 39d24d4204SJose E. Roman if (iascii) { 409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer,&format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); 439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); 449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); 459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); 46d24d4204SJose E. Roman PetscFunctionReturn(0); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48d24d4204SJose E. Roman PetscFunctionReturn(0); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 529566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); 539566063dSJacob Faibussowitsch PetscCall(MatView(Adense,viewer)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 55d24d4204SJose E. Roman PetscFunctionReturn(0); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 59d24d4204SJose E. Roman { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2],irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld*a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr*a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 729566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 769566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 84d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 88d24d4204SJose E. Roman PetscFunctionReturn(0); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 91d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 92d24d4204SJose E. Roman { 93d24d4204SJose E. Roman PetscFunctionBegin; 94d24d4204SJose E. Roman switch (op) { 95d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 96d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 98d24d4204SJose E. Roman case MAT_SYMMETRIC: 99d24d4204SJose E. Roman case MAT_SORTED_FULL: 100d24d4204SJose E. Roman case MAT_HERMITIAN: 101d24d4204SJose E. Roman break; 102d24d4204SJose E. Roman default: 10398921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 104d24d4204SJose E. Roman } 105d24d4204SJose E. Roman PetscFunctionReturn(0); 106d24d4204SJose E. Roman } 107d24d4204SJose E. Roman 108d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 109d24d4204SJose E. Roman { 110d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 111d24d4204SJose E. Roman PetscInt i,j; 112d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 113d24d4204SJose E. Roman 114d24d4204SJose E. Roman PetscFunctionBegin; 115d24d4204SJose E. Roman for (i=0;i<nr;i++) { 116d24d4204SJose E. Roman if (rows[i] < 0) continue; 1179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i]+1,&gridx)); 118d24d4204SJose E. Roman for (j=0;j<nc;j++) { 119d24d4204SJose E. Roman if (cols[j] < 0) continue; 1209566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx)); 121d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 122d24d4204SJose E. Roman if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 123d24d4204SJose E. Roman switch (imode) { 124d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 125d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 12698921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 127d24d4204SJose E. Roman } 128d24d4204SJose E. Roman } else { 12928b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 130d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 1319566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES))); 132d24d4204SJose E. Roman } 133d24d4204SJose E. Roman } 134d24d4204SJose E. Roman } 135d24d4204SJose E. Roman PetscFunctionReturn(0); 136d24d4204SJose E. Roman } 137d24d4204SJose E. Roman 138d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 139d24d4204SJose E. Roman { 140d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 141d24d4204SJose E. Roman PetscScalar *x2d,*y2d,alpha=1.0; 142d24d4204SJose E. Roman const PetscInt *ranges; 143d24d4204SJose E. Roman PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 144d24d4204SJose E. Roman 145d24d4204SJose E. Roman PetscFunctionBegin; 146d24d4204SJose E. Roman if (transpose) { 147d24d4204SJose E. Roman 148d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1499566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 1509566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 151d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 152d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 153d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1549566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 1559566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ 156d24d4204SJose E. Roman ylld = 1; 157d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 158d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 159d24d4204SJose E. Roman 160d24d4204SJose E. Roman /* allocate 2d vectors */ 161d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 162d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1639566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 164d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 167d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 168d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 169d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 170d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 171d24d4204SJose E. Roman 172d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 173d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 174d24d4204SJose E. Roman 175d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 176d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 177d24d4204SJose E. Roman 178d24d4204SJose E. Roman /* call PBLAS subroutine */ 179d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 180d24d4204SJose E. Roman 181d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 182d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 183d24d4204SJose E. Roman 184d24d4204SJose E. Roman } else { /* non-transpose */ 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1879566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 1889566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ 189d24d4204SJose E. Roman xlld = 1; 190d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 191d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1929566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 1939566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ 194d24d4204SJose E. Roman ylld = PetscMax(1,A->rmap->n); 195d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 196d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* allocate 2d vectors */ 199d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 200d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 2019566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 202d24d4204SJose E. Roman ylld = PetscMax(1,lszy); 203d24d4204SJose E. Roman 204d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 205d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 206d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 207d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 208d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 209d24d4204SJose E. Roman 210d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 211d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 212d24d4204SJose E. Roman 213d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 214d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 215d24d4204SJose E. Roman 216d24d4204SJose E. Roman /* call PBLAS subroutine */ 217d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 218d24d4204SJose E. Roman 219d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 220d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 221d24d4204SJose E. Roman 222d24d4204SJose E. Roman } 2239566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d,y2d)); 224d24d4204SJose E. Roman PetscFunctionReturn(0); 225d24d4204SJose E. Roman } 226d24d4204SJose E. Roman 227d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 228d24d4204SJose E. Roman { 229d24d4204SJose E. Roman const PetscScalar *xarray; 230d24d4204SJose E. Roman PetscScalar *yarray; 231d24d4204SJose E. Roman 232d24d4204SJose E. Roman PetscFunctionBegin; 2339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2349566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yarray)); 2359566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); 2369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yarray)); 238d24d4204SJose E. Roman PetscFunctionReturn(0); 239d24d4204SJose E. Roman } 240d24d4204SJose E. Roman 241d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 242d24d4204SJose E. Roman { 243d24d4204SJose E. Roman const PetscScalar *xarray; 244d24d4204SJose E. Roman PetscScalar *yarray; 245d24d4204SJose E. Roman 246d24d4204SJose E. Roman PetscFunctionBegin; 2479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2489566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&yarray)); 2499566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); 2509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&yarray)); 252d24d4204SJose E. Roman PetscFunctionReturn(0); 253d24d4204SJose E. Roman } 254d24d4204SJose E. Roman 255d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 256d24d4204SJose E. Roman { 257d24d4204SJose E. Roman const PetscScalar *xarray; 258d24d4204SJose E. Roman PetscScalar *zarray; 259d24d4204SJose E. Roman 260d24d4204SJose E. Roman PetscFunctionBegin; 2619566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y,z)); 2629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2639566063dSJacob Faibussowitsch PetscCall(VecGetArray(z,&zarray)); 2649566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); 2659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z,&zarray)); 267d24d4204SJose E. Roman PetscFunctionReturn(0); 268d24d4204SJose E. Roman } 269d24d4204SJose E. Roman 270d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 271d24d4204SJose E. Roman { 272d24d4204SJose E. Roman const PetscScalar *xarray; 273d24d4204SJose E. Roman PetscScalar *zarray; 274d24d4204SJose E. Roman 275d24d4204SJose E. Roman PetscFunctionBegin; 2769566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y,z)); 2779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&xarray)); 2789566063dSJacob Faibussowitsch PetscCall(VecGetArray(z,&zarray)); 2799566063dSJacob Faibussowitsch PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); 2809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&xarray)); 2819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z,&zarray)); 282d24d4204SJose E. Roman PetscFunctionReturn(0); 283d24d4204SJose E. Roman } 284d24d4204SJose E. Roman 285d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 286d24d4204SJose E. Roman { 287d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 288d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 289d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 290d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 291d24d4204SJose E. Roman PetscBLASInt one=1; 292d24d4204SJose E. Roman 293d24d4204SJose E. Roman PetscFunctionBegin; 294d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 295d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 296d24d4204SJose E. Roman PetscFunctionReturn(0); 297d24d4204SJose E. Roman } 298d24d4204SJose E. Roman 299d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 300d24d4204SJose E. Roman { 301d24d4204SJose E. Roman PetscFunctionBegin; 3029566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 3039566063dSJacob Faibussowitsch PetscCall(MatSetType(C,MATSCALAPACK)); 3049566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 305d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 306d24d4204SJose E. Roman PetscFunctionReturn(0); 307d24d4204SJose E. Roman } 308d24d4204SJose E. Roman 309d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 310d24d4204SJose E. Roman { 311d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 312d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 313d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 314d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 315d24d4204SJose E. Roman PetscBLASInt one=1; 316d24d4204SJose E. Roman 317d24d4204SJose E. Roman PetscFunctionBegin; 318d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 319d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 320d24d4204SJose E. Roman PetscFunctionReturn(0); 321d24d4204SJose E. Roman } 322d24d4204SJose E. Roman 323d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 324d24d4204SJose E. Roman { 325d24d4204SJose E. Roman PetscFunctionBegin; 3269566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); 3279566063dSJacob Faibussowitsch PetscCall(MatSetType(C,MATSCALAPACK)); 3289566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 329d24d4204SJose E. Roman PetscFunctionReturn(0); 330d24d4204SJose E. Roman } 331d24d4204SJose E. Roman 332d24d4204SJose E. Roman /* --------------------------------------- */ 333d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 334d24d4204SJose E. Roman { 335d24d4204SJose E. Roman PetscFunctionBegin; 336d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 337d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 338d24d4204SJose E. Roman PetscFunctionReturn(0); 339d24d4204SJose E. Roman } 340d24d4204SJose E. Roman 341d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 342d24d4204SJose E. Roman { 343d24d4204SJose E. Roman PetscFunctionBegin; 344d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 345d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 346d24d4204SJose E. Roman PetscFunctionReturn(0); 347d24d4204SJose E. Roman } 348d24d4204SJose E. Roman 349d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 350d24d4204SJose E. Roman { 351d24d4204SJose E. Roman Mat_Product *product = C->product; 352d24d4204SJose E. Roman 353d24d4204SJose E. Roman PetscFunctionBegin; 354d24d4204SJose E. Roman switch (product->type) { 355d24d4204SJose E. Roman case MATPRODUCT_AB: 3569566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 357d24d4204SJose E. Roman break; 358d24d4204SJose E. Roman case MATPRODUCT_ABt: 3599566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 360d24d4204SJose E. Roman break; 36198921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 362d24d4204SJose E. Roman } 363d24d4204SJose E. Roman PetscFunctionReturn(0); 364d24d4204SJose E. Roman } 365d24d4204SJose E. Roman /* --------------------------------------- */ 366d24d4204SJose E. Roman 367d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 368d24d4204SJose E. Roman { 369d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 370d24d4204SJose E. Roman PetscScalar *darray,*d2d,v; 371d24d4204SJose E. Roman const PetscInt *ranges; 372d24d4204SJose E. Roman PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 373d24d4204SJose E. Roman 374d24d4204SJose E. Roman PetscFunctionBegin; 3759566063dSJacob Faibussowitsch PetscCall(VecGetArray(D,&darray)); 376d24d4204SJose E. Roman 377d24d4204SJose E. Roman if (A->rmap->N<=A->cmap->N) { /* row version */ 378d24d4204SJose E. Roman 379d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 3809566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 3819566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 382d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 383d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 384d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 385d24d4204SJose E. Roman 386d24d4204SJose E. Roman /* allocate 2d vector */ 387d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 3889566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 389d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 390d24d4204SJose E. Roman 391d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 392d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 393d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 394d24d4204SJose E. Roman 395d24d4204SJose E. Roman /* collect diagonal */ 396d24d4204SJose E. Roman for (j=1;j<=a->M;j++) { 397d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 398d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 399d24d4204SJose E. Roman } 400d24d4204SJose E. Roman 401d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 402d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 4039566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 404d24d4204SJose E. Roman 405d24d4204SJose E. Roman } else { /* column version */ 406d24d4204SJose E. Roman 407d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4089566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 4099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 410d24d4204SJose E. Roman dlld = 1; 411d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 412d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 413d24d4204SJose E. Roman 414d24d4204SJose E. Roman /* allocate 2d vector */ 415d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 4169566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 417d24d4204SJose E. Roman 418d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 419d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 420d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 421d24d4204SJose E. Roman 422d24d4204SJose E. Roman /* collect diagonal */ 423d24d4204SJose E. Roman for (j=1;j<=a->N;j++) { 424d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 425d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 426d24d4204SJose E. Roman } 427d24d4204SJose E. Roman 428d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 429d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 4309566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 431d24d4204SJose E. Roman } 432d24d4204SJose E. Roman 4339566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D,&darray)); 4349566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D)); 4359566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D)); 436d24d4204SJose E. Roman PetscFunctionReturn(0); 437d24d4204SJose E. Roman } 438d24d4204SJose E. Roman 439d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 440d24d4204SJose E. Roman { 441d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 442d24d4204SJose E. Roman const PetscScalar *d; 443d24d4204SJose E. Roman const PetscInt *ranges; 444d24d4204SJose E. Roman PetscScalar *d2d; 445d24d4204SJose E. Roman PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 446d24d4204SJose E. Roman 447d24d4204SJose E. Roman PetscFunctionBegin; 448d24d4204SJose E. Roman if (R) { 4499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d)); 450d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4519566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 4529566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 453d24d4204SJose E. Roman dlld = 1; 454d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 455d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 456d24d4204SJose E. Roman 457d24d4204SJose E. Roman /* allocate 2d vector */ 458d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 4599566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 460d24d4204SJose E. Roman 461d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 462d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 463d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 464d24d4204SJose E. Roman 465d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 466d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 467d24d4204SJose E. Roman 468d24d4204SJose E. Roman /* broadcast along process columns */ 469d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 470d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 471d24d4204SJose E. Roman 472d24d4204SJose E. Roman /* local scaling */ 473d24d4204SJose E. Roman for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 474d24d4204SJose E. Roman 4759566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 4769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d)); 477d24d4204SJose E. Roman } 478d24d4204SJose E. Roman if (L) { 4799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d)); 480d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4819566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 4829566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 483d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 484d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 485d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 486d24d4204SJose E. Roman 487d24d4204SJose E. Roman /* allocate 2d vector */ 488d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 4899566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd,&d2d)); 490d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 491d24d4204SJose E. Roman 492d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 493d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 494d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 495d24d4204SJose E. Roman 496d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 497d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 498d24d4204SJose E. Roman 499d24d4204SJose E. Roman /* broadcast along process rows */ 500d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 501d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 502d24d4204SJose E. Roman 503d24d4204SJose E. Roman /* local scaling */ 504d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 505d24d4204SJose E. Roman 5069566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d)); 5079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d)); 508d24d4204SJose E. Roman } 509d24d4204SJose E. Roman PetscFunctionReturn(0); 510d24d4204SJose E. Roman } 511d24d4204SJose E. Roman 512d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 513d24d4204SJose E. Roman { 514d24d4204SJose E. Roman PetscFunctionBegin; 515d24d4204SJose E. Roman *missing = PETSC_FALSE; 516d24d4204SJose E. Roman PetscFunctionReturn(0); 517d24d4204SJose E. Roman } 518d24d4204SJose E. Roman 519d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 520d24d4204SJose E. Roman { 521d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 522d24d4204SJose E. Roman PetscBLASInt n,one=1; 523d24d4204SJose E. Roman 524d24d4204SJose E. Roman PetscFunctionBegin; 525d24d4204SJose E. Roman n = x->lld*x->locc; 526d24d4204SJose E. Roman PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 527d24d4204SJose E. Roman PetscFunctionReturn(0); 528d24d4204SJose E. Roman } 529d24d4204SJose E. Roman 530d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 531d24d4204SJose E. Roman { 532d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 533d24d4204SJose E. Roman PetscBLASInt i,n; 534d24d4204SJose E. Roman PetscScalar v; 535d24d4204SJose E. Roman 536d24d4204SJose E. Roman PetscFunctionBegin; 537d24d4204SJose E. Roman n = PetscMin(x->M,x->N); 538d24d4204SJose E. Roman for (i=1;i<=n;i++) { 539d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 540d24d4204SJose E. Roman v += alpha; 541d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 542d24d4204SJose E. Roman } 543d24d4204SJose E. Roman PetscFunctionReturn(0); 544d24d4204SJose E. Roman } 545d24d4204SJose E. Roman 546d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 547d24d4204SJose E. Roman { 548d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 549d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 550d24d4204SJose E. Roman PetscBLASInt one=1; 551d24d4204SJose E. Roman PetscScalar beta=1.0; 552d24d4204SJose E. Roman 553d24d4204SJose E. Roman PetscFunctionBegin; 554d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y,1,X,3); 555d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 5569566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 557d24d4204SJose E. Roman PetscFunctionReturn(0); 558d24d4204SJose E. Roman } 559d24d4204SJose E. Roman 560d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 561d24d4204SJose E. Roman { 562d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 563d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 564d24d4204SJose E. Roman 565d24d4204SJose E. Roman PetscFunctionBegin; 5669566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 5679566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 568d24d4204SJose E. Roman PetscFunctionReturn(0); 569d24d4204SJose E. Roman } 570d24d4204SJose E. Roman 571d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 572d24d4204SJose E. Roman { 573d24d4204SJose E. Roman Mat Bs; 574d24d4204SJose E. Roman MPI_Comm comm; 575d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 576d24d4204SJose E. Roman 577d24d4204SJose E. Roman PetscFunctionBegin; 5789566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5799566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bs)); 5809566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 5819566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs,MATSCALAPACK)); 582d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 583d24d4204SJose E. Roman b->M = a->M; 584d24d4204SJose E. Roman b->N = a->N; 585d24d4204SJose E. Roman b->mb = a->mb; 586d24d4204SJose E. Roman b->nb = a->nb; 587d24d4204SJose E. Roman b->rsrc = a->rsrc; 588d24d4204SJose E. Roman b->csrc = a->csrc; 5899566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs)); 590d24d4204SJose E. Roman *B = Bs; 591d24d4204SJose E. Roman if (op == MAT_COPY_VALUES) { 5929566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 593d24d4204SJose E. Roman } 594d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 595d24d4204SJose E. Roman PetscFunctionReturn(0); 596d24d4204SJose E. Roman } 597d24d4204SJose E. Roman 598d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 599d24d4204SJose E. Roman { 600d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601d24d4204SJose E. Roman Mat Bs = *B; 602d24d4204SJose E. Roman PetscBLASInt one=1; 603d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 604d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 605d24d4204SJose E. Roman PetscInt i,j; 606d24d4204SJose E. Roman #endif 607d24d4204SJose E. Roman 608d24d4204SJose E. Roman PetscFunctionBegin; 609d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6109566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 611d24d4204SJose E. Roman *B = Bs; 612d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 613d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 614d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 615d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 616d24d4204SJose E. Roman /* undo conjugation */ 617d24d4204SJose E. Roman for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 618d24d4204SJose E. Roman #endif 619d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 620d24d4204SJose E. Roman PetscFunctionReturn(0); 621d24d4204SJose E. Roman } 622d24d4204SJose E. Roman 623d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 624d24d4204SJose E. Roman { 625d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 626d24d4204SJose E. Roman PetscInt i,j; 627d24d4204SJose E. Roman 628d24d4204SJose E. Roman PetscFunctionBegin; 629d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 630d24d4204SJose E. Roman PetscFunctionReturn(0); 631d24d4204SJose E. Roman } 632d24d4204SJose E. Roman 633d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 634d24d4204SJose E. Roman { 635d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 636d24d4204SJose E. Roman Mat Bs = *B; 637d24d4204SJose E. Roman PetscBLASInt one=1; 638d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 639d24d4204SJose E. Roman 640d24d4204SJose E. Roman PetscFunctionBegin; 641d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6429566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 643d24d4204SJose E. Roman *B = Bs; 644d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 645d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 646d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 647d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 648d24d4204SJose E. Roman PetscFunctionReturn(0); 649d24d4204SJose E. Roman } 650d24d4204SJose E. Roman 651d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 652d24d4204SJose E. Roman { 653d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 654d24d4204SJose E. Roman PetscScalar *x,*x2d; 655d24d4204SJose E. Roman const PetscInt *ranges; 656d24d4204SJose E. Roman PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 657d24d4204SJose E. Roman 658d24d4204SJose E. Roman PetscFunctionBegin; 6599566063dSJacob Faibussowitsch PetscCall(VecCopy(B,X)); 6609566063dSJacob Faibussowitsch PetscCall(VecGetArray(X,&x)); 661d24d4204SJose E. Roman 662d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 6639566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 6649566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 665d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 666d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 667d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 668d24d4204SJose E. Roman 669d24d4204SJose E. Roman /* allocate 2d vector */ 670d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 6719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx,&x2d)); 672d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 673d24d4204SJose E. Roman 674d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 675d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 676d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 677d24d4204SJose E. Roman 678d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 679d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 680d24d4204SJose E. Roman 681d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 682d24d4204SJose E. Roman switch (A->factortype) { 683d24d4204SJose E. Roman case MAT_FACTOR_LU: 684d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 685d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 686d24d4204SJose E. Roman break; 687d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 688d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 689d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 690d24d4204SJose E. Roman break; 691d24d4204SJose E. Roman default: 692d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 693d24d4204SJose E. Roman } 694d24d4204SJose E. Roman 695d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 696d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 697d24d4204SJose E. Roman 6989566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d)); 6999566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X,&x)); 700d24d4204SJose E. Roman PetscFunctionReturn(0); 701d24d4204SJose E. Roman } 702d24d4204SJose E. Roman 703d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 704d24d4204SJose E. Roman { 705d24d4204SJose E. Roman PetscFunctionBegin; 7069566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A,B,X)); 7079566063dSJacob Faibussowitsch PetscCall(VecAXPY(X,1,Y)); 708d24d4204SJose E. Roman PetscFunctionReturn(0); 709d24d4204SJose E. Roman } 710d24d4204SJose E. Roman 711d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 712d24d4204SJose E. Roman { 713d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 714d24d4204SJose E. Roman PetscBool flg1,flg2; 715d24d4204SJose E. Roman PetscBLASInt one=1,info; 716d24d4204SJose E. Roman 717d24d4204SJose E. Roman PetscFunctionBegin; 7189566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); 7199566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); 7202c71b3e2SJacob Faibussowitsch PetscCheckFalse(!(flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 721d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B,1,X,2); 722d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)B->data; 723d24d4204SJose E. Roman x = (Mat_ScaLAPACK*)X->data; 7249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); 725d24d4204SJose E. Roman 726d24d4204SJose E. Roman switch (A->factortype) { 727d24d4204SJose E. Roman case MAT_FACTOR_LU: 728d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 729d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 730d24d4204SJose E. Roman break; 731d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 732d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 733d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 734d24d4204SJose E. Roman break; 735d24d4204SJose E. Roman default: 736d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 737d24d4204SJose E. Roman } 738d24d4204SJose E. Roman PetscFunctionReturn(0); 739d24d4204SJose E. Roman } 740d24d4204SJose E. Roman 741d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 742d24d4204SJose E. Roman { 743d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 744d24d4204SJose E. Roman PetscBLASInt one=1,info; 745d24d4204SJose E. Roman 746d24d4204SJose E. Roman PetscFunctionBegin; 747d24d4204SJose E. Roman if (!a->pivots) { 7489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots)); 7499566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); 750d24d4204SJose E. Roman } 751d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 752d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf",info); 753d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 754d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 755d24d4204SJose E. Roman 7569566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7579566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 758d24d4204SJose E. Roman PetscFunctionReturn(0); 759d24d4204SJose E. Roman } 760d24d4204SJose E. Roman 761d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 762d24d4204SJose E. Roman { 763d24d4204SJose E. Roman PetscFunctionBegin; 7649566063dSJacob Faibussowitsch PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 7659566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info)); 766d24d4204SJose E. Roman PetscFunctionReturn(0); 767d24d4204SJose E. Roman } 768d24d4204SJose E. Roman 769d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 770d24d4204SJose E. Roman { 771d24d4204SJose E. Roman PetscFunctionBegin; 772d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 773d24d4204SJose E. Roman PetscFunctionReturn(0); 774d24d4204SJose E. Roman } 775d24d4204SJose E. Roman 776d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 777d24d4204SJose E. Roman { 778d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 779d24d4204SJose E. Roman PetscBLASInt one=1,info; 780d24d4204SJose E. Roman 781d24d4204SJose E. Roman PetscFunctionBegin; 782d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 783d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf",info); 784d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 785d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 786d24d4204SJose E. Roman 7879566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype)); 7889566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 789d24d4204SJose E. Roman PetscFunctionReturn(0); 790d24d4204SJose E. Roman } 791d24d4204SJose E. Roman 792d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 793d24d4204SJose E. Roman { 794d24d4204SJose E. Roman PetscFunctionBegin; 7959566063dSJacob Faibussowitsch PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 7969566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info)); 797d24d4204SJose E. Roman PetscFunctionReturn(0); 798d24d4204SJose E. Roman } 799d24d4204SJose E. Roman 800d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 801d24d4204SJose E. Roman { 802d24d4204SJose E. Roman PetscFunctionBegin; 803d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 804d24d4204SJose E. Roman PetscFunctionReturn(0); 805d24d4204SJose E. Roman } 806d24d4204SJose E. Roman 807d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 808d24d4204SJose E. Roman { 809d24d4204SJose E. Roman PetscFunctionBegin; 810d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 811d24d4204SJose E. Roman PetscFunctionReturn(0); 812d24d4204SJose E. Roman } 813d24d4204SJose E. Roman 814d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 815d24d4204SJose E. Roman { 816d24d4204SJose E. Roman Mat B; 81759172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 818d24d4204SJose E. Roman 819d24d4204SJose E. Roman PetscFunctionBegin; 820d24d4204SJose E. Roman /* Create the factorization matrix */ 8219566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); 82266e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 823d24d4204SJose E. Roman B->factortype = ftype; 8249566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 8259566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); 826d24d4204SJose E. Roman 8279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); 828d24d4204SJose E. Roman *F = B; 829d24d4204SJose E. Roman PetscFunctionReturn(0); 830d24d4204SJose E. Roman } 831d24d4204SJose E. Roman 832d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 833d24d4204SJose E. Roman { 834d24d4204SJose E. Roman PetscFunctionBegin; 8359566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); 8369566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); 837d24d4204SJose E. Roman PetscFunctionReturn(0); 838d24d4204SJose E. Roman } 839d24d4204SJose E. Roman 840d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 841d24d4204SJose E. Roman { 842d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 843d24d4204SJose E. Roman PetscBLASInt one=1,lwork=0; 844d24d4204SJose E. Roman const char *ntype; 845d68f4f38SPierre Jolivet PetscScalar *work=NULL,dummy; 846d24d4204SJose E. Roman 847d24d4204SJose E. Roman PetscFunctionBegin; 848d24d4204SJose E. Roman switch (type) { 849d24d4204SJose E. Roman case NORM_1: 850d24d4204SJose E. Roman ntype = "1"; 851d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 852d24d4204SJose E. Roman break; 853d24d4204SJose E. Roman case NORM_FROBENIUS: 854d24d4204SJose E. Roman ntype = "F"; 855d24d4204SJose E. Roman work = &dummy; 856d24d4204SJose E. Roman break; 857d24d4204SJose E. Roman case NORM_INFINITY: 858d24d4204SJose E. Roman ntype = "I"; 859d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 860d24d4204SJose E. Roman break; 861d24d4204SJose E. Roman default: 862d24d4204SJose E. Roman SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 863d24d4204SJose E. Roman } 8649566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork,&work)); 865d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 8669566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work)); 867d24d4204SJose E. Roman PetscFunctionReturn(0); 868d24d4204SJose E. Roman } 869d24d4204SJose E. Roman 870d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 871d24d4204SJose E. Roman { 872d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 873d24d4204SJose E. Roman 874d24d4204SJose E. Roman PetscFunctionBegin; 8759566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc,a->lld*a->locc)); 876d24d4204SJose E. Roman PetscFunctionReturn(0); 877d24d4204SJose E. Roman } 878d24d4204SJose E. Roman 879d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 880d24d4204SJose E. Roman { 881d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 882d24d4204SJose E. Roman PetscInt i,n,nb,isrc,nproc,iproc,*idx; 883d24d4204SJose E. Roman 884d24d4204SJose E. Roman PetscFunctionBegin; 885d24d4204SJose E. Roman if (rows) { 886d24d4204SJose E. Roman n = a->locr; 887d24d4204SJose E. Roman nb = a->mb; 888d24d4204SJose E. Roman isrc = a->rsrc; 889d24d4204SJose E. Roman nproc = a->grid->nprow; 890d24d4204SJose E. Roman iproc = a->grid->myrow; 8919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&idx)); 892d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 8939566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows)); 894d24d4204SJose E. Roman } 895d24d4204SJose E. Roman if (cols) { 896d24d4204SJose E. Roman n = a->locc; 897d24d4204SJose E. Roman nb = a->nb; 898d24d4204SJose E. Roman isrc = a->csrc; 899d24d4204SJose E. Roman nproc = a->grid->npcol; 900d24d4204SJose E. Roman iproc = a->grid->mycol; 9019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&idx)); 902d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 9039566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols)); 904d24d4204SJose E. Roman } 905d24d4204SJose E. Roman PetscFunctionReturn(0); 906d24d4204SJose E. Roman } 907d24d4204SJose E. Roman 908d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 909d24d4204SJose E. Roman { 910d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 911d24d4204SJose E. Roman Mat Bmpi; 912d24d4204SJose E. Roman MPI_Comm comm; 9134b1a79daSJose E. Roman PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 9144b1a79daSJose E. Roman const PetscInt *ranges,*branges,*cwork; 9154b1a79daSJose E. Roman const PetscScalar *vwork; 916d24d4204SJose E. Roman PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 917d24d4204SJose E. Roman PetscScalar *barray; 9184b1a79daSJose E. Roman PetscBool differ=PETSC_FALSE; 9194b1a79daSJose E. Roman PetscMPIInt size; 920d24d4204SJose E. Roman 921d24d4204SJose E. Roman PetscFunctionBegin; 9229566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 9239566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 9244b1a79daSJose E. Roman 9254b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 9279566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges)); 9284b1a79daSJose E. Roman for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 9294b1a79daSJose E. Roman } 9304b1a79daSJose E. Roman 9314b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9329566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bmpi)); 9334b1a79daSJose E. Roman m = PETSC_DECIDE; 9349566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 9354b1a79daSJose E. Roman n = PETSC_DECIDE; 9369566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 9379566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 9389566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi,MATDENSE)); 9399566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 9404b1a79daSJose E. Roman 9414b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9429566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 9434b1a79daSJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 9444b1a79daSJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 9454b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit",info); 9464b1a79daSJose E. Roman 9474b1a79daSJose E. Roman /* redistribute matrix */ 9489566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi,&barray)); 9494b1a79daSJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 9509566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 9519566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 9529566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 9534b1a79daSJose E. Roman 9544b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9559566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend)); 9564b1a79daSJose E. Roman for (i=rstart;i<rend;i++) { 9579566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork)); 9589566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES)); 9599566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork)); 9604b1a79daSJose E. Roman } 9619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY)); 9629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY)); 9639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi)); 9644b1a79daSJose E. Roman 9654b1a79daSJose E. Roman } else { /* normal cases */ 966d24d4204SJose E. Roman 967d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 968d24d4204SJose E. Roman else { 9699566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bmpi)); 97092c846b4SJose E. Roman m = PETSC_DECIDE; 9719566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 97292c846b4SJose E. Roman n = PETSC_DECIDE; 9739566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 9749566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 9759566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi,MATDENSE)); 9769566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 977d24d4204SJose E. Roman } 978d24d4204SJose E. Roman 979d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9809566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 981d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 982d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 983d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 984d24d4204SJose E. Roman 985d24d4204SJose E. Roman /* redistribute matrix */ 9869566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi,&barray)); 987d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 9889566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 989d24d4204SJose E. Roman 9909566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 9919566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 992d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 9939566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&Bmpi)); 994d24d4204SJose E. Roman } else *B = Bmpi; 9954b1a79daSJose E. Roman } 996d24d4204SJose E. Roman PetscFunctionReturn(0); 997d24d4204SJose E. Roman } 998d24d4204SJose E. Roman 999d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1000d24d4204SJose E. Roman { 1001d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1002d24d4204SJose E. Roman Mat Bmpi; 1003d24d4204SJose E. Roman MPI_Comm comm; 100492c846b4SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1005d24d4204SJose E. Roman const PetscInt *ranges; 1006d24d4204SJose E. Roman PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1007d24d4204SJose E. Roman PetscScalar *aarray; 10084e8b52a1SJose E. Roman PetscInt lda; 1009d24d4204SJose E. Roman 1010d24d4204SJose E. Roman PetscFunctionBegin; 10119566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1012d24d4204SJose E. Roman 1013d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1014d24d4204SJose E. Roman else { 10159566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Bmpi)); 101692c846b4SJose E. Roman m = PETSC_DECIDE; 10179566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 101892c846b4SJose E. Roman n = PETSC_DECIDE; 10199566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 10209566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 10219566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi,MATSCALAPACK)); 10229566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi)); 1023d24d4204SJose E. Roman } 1024d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bmpi->data; 1025d24d4204SJose E. Roman 1026d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 10279566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 10289566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ 10299566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(A,&lda)); 10304e8b52a1SJose E. Roman lld = PetscMax(lda,1); /* local leading dimension */ 1031d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1032d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1033d24d4204SJose E. Roman 1034d24d4204SJose E. Roman /* redistribute matrix */ 10359566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(A,&aarray)); 1036d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 10379566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A,&aarray)); 1038d24d4204SJose E. Roman 10399566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 10409566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1041d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10429566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A,&Bmpi)); 1043d24d4204SJose E. Roman } else *B = Bmpi; 1044d24d4204SJose E. Roman PetscFunctionReturn(0); 1045d24d4204SJose E. Roman } 1046d24d4204SJose E. Roman 1047d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1048d24d4204SJose E. Roman { 1049d24d4204SJose E. Roman Mat mat_scal; 1050d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1051d24d4204SJose E. Roman const PetscInt *cols; 1052d24d4204SJose E. Roman const PetscScalar *vals; 1053d24d4204SJose E. Roman 1054d24d4204SJose E. Roman PetscFunctionBegin; 1055d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1056d24d4204SJose E. Roman mat_scal = *newmat; 10579566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1058d24d4204SJose E. Roman } else { 10599566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1060d24d4204SJose E. Roman m = PETSC_DECIDE; 10619566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1062d24d4204SJose E. Roman n = PETSC_DECIDE; 10639566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 10649566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 10659566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 10669566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1067d24d4204SJose E. Roman } 1068d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 10699566063dSJacob Faibussowitsch PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 10709566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES)); 10719566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1072d24d4204SJose E. Roman } 10739566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 10749566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1075d24d4204SJose E. Roman 10769566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1077d24d4204SJose E. Roman else *newmat = mat_scal; 1078d24d4204SJose E. Roman PetscFunctionReturn(0); 1079d24d4204SJose E. Roman } 1080d24d4204SJose E. Roman 1081d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1082d24d4204SJose E. Roman { 1083d24d4204SJose E. Roman Mat mat_scal; 1084d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1085d24d4204SJose E. Roman const PetscInt *cols; 1086d24d4204SJose E. Roman const PetscScalar *vals; 1087d24d4204SJose E. Roman PetscScalar v; 1088d24d4204SJose E. Roman 1089d24d4204SJose E. Roman PetscFunctionBegin; 1090d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1091d24d4204SJose E. Roman mat_scal = *newmat; 10929566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal)); 1093d24d4204SJose E. Roman } else { 10949566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1095d24d4204SJose E. Roman m = PETSC_DECIDE; 10969566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1097d24d4204SJose E. Roman n = PETSC_DECIDE; 10989566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 10999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 11009566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 11019566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal)); 1102d24d4204SJose E. Roman } 11039566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1104d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 11059566063dSJacob Faibussowitsch PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 11069566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES)); 1107d24d4204SJose E. Roman for (j=0;j<ncols;j++) { /* lower triangular part */ 1108d24d4204SJose E. Roman if (cols[j] == row) continue; 1109d24d4204SJose E. Roman v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 11109566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); 1111d24d4204SJose E. Roman } 11129566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1113d24d4204SJose E. Roman } 11149566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 11159566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 11169566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1117d24d4204SJose E. Roman 11189566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1119d24d4204SJose E. Roman else *newmat = mat_scal; 1120d24d4204SJose E. Roman PetscFunctionReturn(0); 1121d24d4204SJose E. Roman } 1122d24d4204SJose E. Roman 1123d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1124d24d4204SJose E. Roman { 1125d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1126d24d4204SJose E. Roman PetscInt sz=0; 1127d24d4204SJose E. Roman 1128d24d4204SJose E. Roman PetscFunctionBegin; 11299566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11309566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1131d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1132d24d4204SJose E. Roman 11339566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11349566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld,a->locc,&sz)); 11359566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz,&a->loc)); 11369566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); 1137d24d4204SJose E. Roman 1138d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1139d24d4204SJose E. Roman PetscFunctionReturn(0); 1140d24d4204SJose E. Roman } 1141d24d4204SJose E. Roman 1142d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1143d24d4204SJose E. Roman { 1144d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1145d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1146d24d4204SJose E. Roman PetscBool flg; 1147d24d4204SJose E. Roman MPI_Comm icomm; 1148d24d4204SJose E. Roman 1149d24d4204SJose E. Roman PetscFunctionBegin; 11509566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash)); 11519566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc)); 11529566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots)); 11539566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 11549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1155d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1156d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1157d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1158d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 11599566063dSJacob Faibussowitsch PetscCall(PetscFree(grid)); 11609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); 1161d24d4204SJose E. Roman } 11629566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 11639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); 11649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 11659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); 11669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); 11679566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1168d24d4204SJose E. Roman PetscFunctionReturn(0); 1169d24d4204SJose E. Roman } 1170d24d4204SJose E. Roman 11719fbee547SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1172d24d4204SJose E. Roman { 1173d24d4204SJose E. Roman const PetscInt *ranges; 1174d24d4204SJose E. Roman PetscMPIInt size; 1175d24d4204SJose E. Roman PetscInt i,n; 1176d24d4204SJose E. Roman 1177d24d4204SJose E. Roman PetscFunctionBegin; 11789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(map->comm,&size)); 1179d24d4204SJose E. Roman if (size>2) { 11809566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(map,&ranges)); 1181d24d4204SJose E. Roman n = ranges[1]-ranges[0]; 1182d24d4204SJose E. Roman for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 11832c71b3e2SJacob Faibussowitsch PetscCheckFalse(i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1184d24d4204SJose E. Roman } 1185d24d4204SJose E. Roman PetscFunctionReturn(0); 1186d24d4204SJose E. Roman } 1187d24d4204SJose E. Roman 1188d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1189d24d4204SJose E. Roman { 1190d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1191d24d4204SJose E. Roman PetscBLASInt info=0; 1192d24d4204SJose E. Roman 1193d24d4204SJose E. Roman PetscFunctionBegin; 11949566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 11959566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1196d24d4204SJose E. Roman 1197d24d4204SJose E. Roman /* check that the layout is as enforced by MatCreateScaLAPACK */ 11989566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKCheckLayout(A->rmap)); 11999566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKCheckLayout(A->cmap)); 1200d24d4204SJose E. Roman 1201d24d4204SJose E. Roman /* compute local sizes */ 12029566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N,&a->M)); 12039566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N,&a->N)); 1204d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1205d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1206d24d4204SJose E. Roman a->lld = PetscMax(1,a->locr); 1207d24d4204SJose E. Roman 1208d24d4204SJose E. Roman /* allocate local array */ 12099566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A)); 1210d24d4204SJose E. Roman 1211d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1212d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1213d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1214d24d4204SJose E. Roman PetscFunctionReturn(0); 1215d24d4204SJose E. Roman } 1216d24d4204SJose E. Roman 1217d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1218d24d4204SJose E. Roman { 1219d24d4204SJose E. Roman PetscInt nstash,reallocs; 1220d24d4204SJose E. Roman 1221d24d4204SJose E. Roman PetscFunctionBegin; 1222d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 12239566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL)); 12249566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); 12259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 1226d24d4204SJose E. Roman PetscFunctionReturn(0); 1227d24d4204SJose E. Roman } 1228d24d4204SJose E. Roman 1229d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1230d24d4204SJose E. Roman { 1231d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1232d24d4204SJose E. Roman PetscMPIInt n; 1233d24d4204SJose E. Roman PetscInt i,flg,*row,*col; 1234d24d4204SJose E. Roman PetscScalar *val; 1235d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1236d24d4204SJose E. Roman 1237d24d4204SJose E. Roman PetscFunctionBegin; 1238d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1239d24d4204SJose E. Roman while (1) { 12409566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); 1241d24d4204SJose E. Roman if (!flg) break; 1242d24d4204SJose E. Roman for (i=0;i<n;i++) { 12439566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i]+1,&gridx)); 12449566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i]+1,&gcidx)); 1245d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 12462c71b3e2SJacob Faibussowitsch PetscCheckFalse(rsrc!=a->grid->myrow || csrc!=a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1247d24d4204SJose E. Roman switch (A->insertmode) { 1248d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1249d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 125098921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1251d24d4204SJose E. Roman } 1252d24d4204SJose E. Roman } 1253d24d4204SJose E. Roman } 12549566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash)); 1255d24d4204SJose E. Roman PetscFunctionReturn(0); 1256d24d4204SJose E. Roman } 1257d24d4204SJose E. Roman 1258d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1259d24d4204SJose E. Roman { 1260d24d4204SJose E. Roman Mat Adense,As; 1261d24d4204SJose E. Roman MPI_Comm comm; 1262d24d4204SJose E. Roman 1263d24d4204SJose E. Roman PetscFunctionBegin; 12649566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm)); 12659566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,&Adense)); 12669566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense,MATDENSE)); 12679566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense,viewer)); 12689566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); 12699566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense)); 12709566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat,&As)); 1271d24d4204SJose E. Roman PetscFunctionReturn(0); 1272d24d4204SJose E. Roman } 1273d24d4204SJose E. Roman 1274d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 1275d24d4204SJose E. Roman static struct _MatOps MatOps_Values = { 1276d24d4204SJose E. Roman MatSetValues_ScaLAPACK, 1277d24d4204SJose E. Roman 0, 1278d24d4204SJose E. Roman 0, 1279d24d4204SJose E. Roman MatMult_ScaLAPACK, 1280d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1281d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1282d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1283d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1284d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1285d24d4204SJose E. Roman 0, 1286d24d4204SJose E. Roman /*10*/ 0, 1287d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1288d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1289d24d4204SJose E. Roman 0, 1290d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1291d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1292d24d4204SJose E. Roman 0, 1293d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1294d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1295d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1296d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1297d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1298d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1299d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1300d24d4204SJose E. Roman /*24*/ 0, 1301d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1302d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1303d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1304d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1305d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1306d24d4204SJose E. Roman 0, 1307d24d4204SJose E. Roman 0, 1308d24d4204SJose E. Roman 0, 1309d24d4204SJose E. Roman 0, 1310d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1311d24d4204SJose E. Roman 0, 1312d24d4204SJose E. Roman 0, 1313d24d4204SJose E. Roman 0, 1314d24d4204SJose E. Roman 0, 1315d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1316d24d4204SJose E. Roman 0, 1317d24d4204SJose E. Roman 0, 1318d24d4204SJose E. Roman 0, 1319d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1320d24d4204SJose E. Roman /*44*/ 0, 1321d24d4204SJose E. Roman MatScale_ScaLAPACK, 1322d24d4204SJose E. Roman MatShift_ScaLAPACK, 1323d24d4204SJose E. Roman 0, 1324d24d4204SJose E. Roman 0, 1325d24d4204SJose E. Roman /*49*/ 0, 1326d24d4204SJose E. Roman 0, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman 0, 1329d24d4204SJose E. Roman 0, 1330d24d4204SJose E. Roman /*54*/ 0, 1331d24d4204SJose E. Roman 0, 1332d24d4204SJose E. Roman 0, 1333d24d4204SJose E. Roman 0, 1334d24d4204SJose E. Roman 0, 1335d24d4204SJose E. Roman /*59*/ 0, 1336d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1337d24d4204SJose E. Roman MatView_ScaLAPACK, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman 0, 1340d24d4204SJose E. Roman /*64*/ 0, 1341d24d4204SJose E. Roman 0, 1342d24d4204SJose E. Roman 0, 1343d24d4204SJose E. Roman 0, 1344d24d4204SJose E. Roman 0, 1345d24d4204SJose E. Roman /*69*/ 0, 1346d24d4204SJose E. Roman 0, 1347d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1348d24d4204SJose E. Roman 0, 1349d24d4204SJose E. Roman 0, 1350d24d4204SJose E. Roman /*74*/ 0, 1351d24d4204SJose E. Roman 0, 1352d24d4204SJose E. Roman 0, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman 0, 1355d24d4204SJose E. Roman /*79*/ 0, 1356d24d4204SJose E. Roman 0, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1360d24d4204SJose E. Roman /*84*/ 0, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman 0, 1365d24d4204SJose E. Roman /*89*/ 0, 1366d24d4204SJose E. Roman 0, 1367d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1368d24d4204SJose E. Roman 0, 1369d24d4204SJose E. Roman 0, 1370d24d4204SJose E. Roman /*94*/ 0, 1371d24d4204SJose E. Roman 0, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1374d24d4204SJose E. Roman 0, 1375d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1376d24d4204SJose E. Roman 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1379d24d4204SJose E. Roman 0, 1380d24d4204SJose E. Roman /*104*/0, 1381d24d4204SJose E. Roman 0, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman 0, 1385d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK, 1386d24d4204SJose E. Roman 0, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1390d24d4204SJose E. Roman /*114*/0, 1391d24d4204SJose E. Roman 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman 0, 1395d24d4204SJose E. Roman /*119*/0, 1396d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1397d24d4204SJose E. Roman 0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman 0, 1400d24d4204SJose E. Roman /*124*/0, 1401d24d4204SJose E. Roman 0, 1402d24d4204SJose E. Roman 0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman 0, 1405d24d4204SJose E. Roman /*129*/0, 1406d24d4204SJose E. Roman 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman 0, 1409d24d4204SJose E. Roman 0, 1410d24d4204SJose E. Roman /*134*/0, 1411d24d4204SJose E. Roman 0, 1412d24d4204SJose E. Roman 0, 1413d24d4204SJose E. Roman 0, 1414d24d4204SJose E. Roman 0, 1415d24d4204SJose E. Roman 0, 1416d24d4204SJose E. Roman /*140*/0, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman 0, 1420d24d4204SJose E. Roman 0, 1421d24d4204SJose E. Roman /*145*/0, 1422d24d4204SJose E. Roman 0, 1423d24d4204SJose E. Roman 0 1424d24d4204SJose E. Roman }; 1425d24d4204SJose E. Roman 1426d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1427d24d4204SJose E. Roman { 1428d24d4204SJose E. Roman PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1429d24d4204SJose E. Roman PetscInt size=stash->size,nsends; 1430d24d4204SJose E. Roman PetscInt count,*sindices,**rindices,i,j,l; 1431d24d4204SJose E. Roman PetscScalar **rvalues,*svalues; 1432d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1433d24d4204SJose E. Roman MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1434d24d4204SJose E. Roman PetscMPIInt *sizes,*nlengths,nreceives; 1435d24d4204SJose E. Roman PetscInt *sp_idx,*sp_idy; 1436d24d4204SJose E. Roman PetscScalar *sp_val; 1437d24d4204SJose E. Roman PetscMatStashSpace space,space_next; 1438d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1439d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1440d24d4204SJose E. Roman 1441d24d4204SJose E. Roman PetscFunctionBegin; 1442d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1443d24d4204SJose E. Roman InsertMode addv; 14449566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 14452c71b3e2SJacob Faibussowitsch PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1446d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1447d24d4204SJose E. Roman } 1448d24d4204SJose E. Roman 1449d24d4204SJose E. Roman bs2 = stash->bs*stash->bs; 1450d24d4204SJose E. Roman 1451d24d4204SJose E. Roman /* first count number of contributors to each processor */ 14529566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size,&nlengths)); 14539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n+1,&owner)); 1454d24d4204SJose E. Roman 1455d24d4204SJose E. Roman i = j = 0; 1456d24d4204SJose E. Roman space = stash->space_head; 1457d24d4204SJose E. Roman while (space) { 1458d24d4204SJose E. Roman space_next = space->next; 1459d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 14609566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx)); 14619566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx)); 1462d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1463d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1464d24d4204SJose E. Roman nlengths[j]++; owner[i] = j; 1465d24d4204SJose E. Roman i++; 1466d24d4204SJose E. Roman } 1467d24d4204SJose E. Roman space = space_next; 1468d24d4204SJose E. Roman } 1469d24d4204SJose E. Roman 1470d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 14719566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size,&sizes)); 1472d24d4204SJose E. Roman for (i=0, nsends=0; i<size; i++) { 1473d24d4204SJose E. Roman if (nlengths[i]) { 1474d24d4204SJose E. Roman sizes[i] = 1; nsends++; 1475d24d4204SJose E. Roman } 1476d24d4204SJose E. Roman } 1477d24d4204SJose E. Roman 1478d24d4204SJose E. Roman {PetscMPIInt *onodes,*olengths; 1479d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 14809566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 14819566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 1482d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1483d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] *=2; 14849566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 1485d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1486d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 14879566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 14889566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 14899566063dSJacob Faibussowitsch PetscCall(PetscFree(olengths));} 1490d24d4204SJose E. Roman 1491d24d4204SJose E. Roman /* do sends: 1492d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1493d24d4204SJose E. Roman the ith processor 1494d24d4204SJose E. Roman */ 14959566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 14969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nsends,&send_waits)); 14979566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size,&startv,size,&starti)); 1498d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 1499d24d4204SJose E. Roman startv[0] = 0; starti[0] = 0; 1500d24d4204SJose E. Roman for (i=1; i<size; i++) { 1501d24d4204SJose E. Roman startv[i] = startv[i-1] + nlengths[i-1]; 1502d24d4204SJose E. Roman starti[i] = starti[i-1] + 2*nlengths[i-1]; 1503d24d4204SJose E. Roman } 1504d24d4204SJose E. Roman 1505d24d4204SJose E. Roman i = 0; 1506d24d4204SJose E. Roman space = stash->space_head; 1507d24d4204SJose E. Roman while (space) { 1508d24d4204SJose E. Roman space_next = space->next; 1509d24d4204SJose E. Roman sp_idx = space->idx; 1510d24d4204SJose E. Roman sp_idy = space->idy; 1511d24d4204SJose E. Roman sp_val = space->val; 1512d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1513d24d4204SJose E. Roman j = owner[i]; 1514d24d4204SJose E. Roman if (bs2 == 1) { 1515d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1516d24d4204SJose E. Roman } else { 1517d24d4204SJose E. Roman PetscInt k; 1518d24d4204SJose E. Roman PetscScalar *buf1,*buf2; 1519d24d4204SJose E. Roman buf1 = svalues+bs2*startv[j]; 1520d24d4204SJose E. Roman buf2 = space->val + bs2*l; 1521d24d4204SJose E. Roman for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1522d24d4204SJose E. Roman } 1523d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1524d24d4204SJose E. Roman sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1525d24d4204SJose E. Roman startv[j]++; 1526d24d4204SJose E. Roman starti[j]++; 1527d24d4204SJose E. Roman i++; 1528d24d4204SJose E. Roman } 1529d24d4204SJose E. Roman space = space_next; 1530d24d4204SJose E. Roman } 1531d24d4204SJose E. Roman startv[0] = 0; 1532d24d4204SJose E. Roman for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1533d24d4204SJose E. Roman 1534d24d4204SJose E. Roman for (i=0,count=0; i<size; i++) { 1535d24d4204SJose E. Roman if (sizes[i]) { 15369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 15379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 1538d24d4204SJose E. Roman } 1539d24d4204SJose E. Roman } 1540d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 15419566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends)); 1542d24d4204SJose E. Roman for (i=0; i<size; i++) { 1543d24d4204SJose E. Roman if (sizes[i]) { 15449566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 1545d24d4204SJose E. Roman } 1546d24d4204SJose E. Roman } 1547d24d4204SJose E. Roman #endif 15489566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 15499566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 15509566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv,starti)); 15519566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 1552d24d4204SJose E. Roman 1553d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 15549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nreceives,&recv_waits)); 1555d24d4204SJose E. Roman 1556d24d4204SJose E. Roman for (i=0; i<nreceives; i++) { 1557d24d4204SJose E. Roman recv_waits[2*i] = recv_waits1[i]; 1558d24d4204SJose E. Roman recv_waits[2*i+1] = recv_waits2[i]; 1559d24d4204SJose E. Roman } 1560d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1561d24d4204SJose E. Roman 15629566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 15639566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 1564d24d4204SJose E. Roman 1565d24d4204SJose E. Roman stash->svalues = svalues; 1566d24d4204SJose E. Roman stash->sindices = sindices; 1567d24d4204SJose E. Roman stash->rvalues = rvalues; 1568d24d4204SJose E. Roman stash->rindices = rindices; 1569d24d4204SJose E. Roman stash->send_waits = send_waits; 1570d24d4204SJose E. Roman stash->nsends = nsends; 1571d24d4204SJose E. Roman stash->nrecvs = nreceives; 1572d24d4204SJose E. Roman stash->reproduce_count = 0; 1573d24d4204SJose E. Roman PetscFunctionReturn(0); 1574d24d4204SJose E. Roman } 1575d24d4204SJose E. Roman 1576d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1577d24d4204SJose E. Roman { 1578d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1579d24d4204SJose E. Roman 1580d24d4204SJose E. Roman PetscFunctionBegin; 158128b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 15822c71b3e2SJacob Faibussowitsch PetscCheckFalse(mb<1 && mb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); 15832c71b3e2SJacob Faibussowitsch PetscCheckFalse(nb<1 && nb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); 15849566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 15859566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1586d24d4204SJose E. Roman PetscFunctionReturn(0); 1587d24d4204SJose E. Roman } 1588d24d4204SJose E. Roman 1589d24d4204SJose E. Roman /*@ 1590d24d4204SJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1591d24d4204SJose E. Roman the ScaLAPACK matrix 1592d24d4204SJose E. Roman 1593d24d4204SJose E. Roman Logically Collective on A 1594d24d4204SJose E. Roman 1595d8d19677SJose E. Roman Input Parameters: 1596d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1597d24d4204SJose E. Roman . mb - the row block size 1598d24d4204SJose E. Roman - nb - the column block size 1599d24d4204SJose E. Roman 1600d24d4204SJose E. Roman Level: intermediate 1601d24d4204SJose E. Roman 1602d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1603d24d4204SJose E. Roman @*/ 1604d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1605d24d4204SJose E. Roman { 1606d24d4204SJose E. Roman PetscFunctionBegin; 1607d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1608d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,mb,2); 1609d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,nb,3); 1610*cac4c232SBarry Smith PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)); 1611d24d4204SJose E. Roman PetscFunctionReturn(0); 1612d24d4204SJose E. Roman } 1613d24d4204SJose E. Roman 1614d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1615d24d4204SJose E. Roman { 1616d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1617d24d4204SJose E. Roman 1618d24d4204SJose E. Roman PetscFunctionBegin; 1619d24d4204SJose E. Roman if (mb) *mb = a->mb; 1620d24d4204SJose E. Roman if (nb) *nb = a->nb; 1621d24d4204SJose E. Roman PetscFunctionReturn(0); 1622d24d4204SJose E. Roman } 1623d24d4204SJose E. Roman 1624d24d4204SJose E. Roman /*@ 1625d24d4204SJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1626d24d4204SJose E. Roman the ScaLAPACK matrix 1627d24d4204SJose E. Roman 1628d24d4204SJose E. Roman Not collective 1629d24d4204SJose E. Roman 1630d24d4204SJose E. Roman Input Parameter: 1631d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1632d24d4204SJose E. Roman 1633d24d4204SJose E. Roman Output Parameters: 1634d24d4204SJose E. Roman + mb - the row block size 1635d24d4204SJose E. Roman - nb - the column block size 1636d24d4204SJose E. Roman 1637d24d4204SJose E. Roman Level: intermediate 1638d24d4204SJose E. Roman 1639d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1640d24d4204SJose E. Roman @*/ 1641d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1642d24d4204SJose E. Roman { 1643d24d4204SJose E. Roman PetscFunctionBegin; 1644d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1645*cac4c232SBarry Smith PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)); 1646d24d4204SJose E. Roman PetscFunctionReturn(0); 1647d24d4204SJose E. Roman } 1648d24d4204SJose E. Roman 1649d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1650d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1651d24d4204SJose E. Roman 1652d24d4204SJose E. Roman /*MC 1653d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1654d24d4204SJose E. Roman 1655d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1656d24d4204SJose E. Roman 1657d24d4204SJose E. Roman Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1658d24d4204SJose E. Roman 1659d24d4204SJose E. Roman Options Database Keys: 1660d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1661d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1662d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1663d24d4204SJose E. Roman 1664d24d4204SJose E. Roman Level: beginner 1665d24d4204SJose E. Roman 1666d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL 1667d24d4204SJose E. Roman M*/ 1668d24d4204SJose E. Roman 1669d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1670d24d4204SJose E. Roman { 1671d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1672d24d4204SJose E. Roman PetscErrorCode ierr; 1673d24d4204SJose E. Roman PetscBool flg,flg1; 1674d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1675d24d4204SJose E. Roman MPI_Comm icomm; 1676d24d4204SJose E. Roman PetscBLASInt nprow,npcol,myrow,mycol; 1677d24d4204SJose E. Roman PetscInt optv1,k=2,array[2]={0,0}; 1678d24d4204SJose E. Roman PetscMPIInt size; 1679d24d4204SJose E. Roman 1680d24d4204SJose E. Roman PetscFunctionBegin; 16819566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 1682d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1683d24d4204SJose E. Roman 16849566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); 1685d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1686d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1687d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1688d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1689d24d4204SJose E. Roman 16909566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&a)); 1691d24d4204SJose E. Roman A->data = (void*)a; 1692d24d4204SJose E. Roman 1693d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1694d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 16959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); 16969566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 16979566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); 1698d24d4204SJose E. Roman } 16999566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 17009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1701d24d4204SJose E. Roman if (!flg) { 17029566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&grid)); 1703d24d4204SJose E. Roman 17049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm,&size)); 1705d24d4204SJose E. Roman grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1706d24d4204SJose E. Roman 17079566063dSJacob Faibussowitsch ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");PetscCall(ierr); 17089566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); 1709d24d4204SJose E. Roman if (flg1) { 17102c71b3e2SJacob Faibussowitsch PetscCheckFalse(size % optv1,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); 1711d24d4204SJose E. Roman grid->nprow = optv1; 1712d24d4204SJose E. Roman } 17139566063dSJacob Faibussowitsch ierr = PetscOptionsEnd();PetscCall(ierr); 1714d24d4204SJose E. Roman 1715d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1716d24d4204SJose E. Roman grid->npcol = size/grid->nprow; 17179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow,&nprow)); 17189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol,&npcol)); 1719f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1720d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1721d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1722d24d4204SJose E. Roman grid->grid_refct = 1; 1723d24d4204SJose E. Roman grid->nprow = nprow; 1724d24d4204SJose E. Roman grid->npcol = npcol; 1725d24d4204SJose E. Roman grid->myrow = myrow; 1726d24d4204SJose E. Roman grid->mycol = mycol; 1727d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1728f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1729d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1730f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1731d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol,"R",size,1); 17329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); 1733d24d4204SJose E. Roman 1734d24d4204SJose E. Roman } else grid->grid_refct++; 17359566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm)); 1736d24d4204SJose E. Roman a->grid = grid; 1737d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1738d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1739d24d4204SJose E. Roman 17409566063dSJacob Faibussowitsch ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");PetscCall(ierr); 17419566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); 1742d24d4204SJose E. Roman if (flg) { 1743d24d4204SJose E. Roman a->mb = array[0]; 1744d24d4204SJose E. Roman a->nb = (k>1)? array[1]: a->mb; 1745d24d4204SJose E. Roman } 17469566063dSJacob Faibussowitsch ierr = PetscOptionsEnd();PetscCall(ierr); 1747d24d4204SJose E. Roman 17489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); 17499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); 17509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); 17519566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); 1752d24d4204SJose E. Roman PetscFunctionReturn(0); 1753d24d4204SJose E. Roman } 1754d24d4204SJose E. Roman 1755d24d4204SJose E. Roman /*@C 1756d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1757d24d4204SJose E. Roman (2D block cyclic distribution). 1758d24d4204SJose E. Roman 1759d24d4204SJose E. Roman Collective 1760d24d4204SJose E. Roman 1761d24d4204SJose E. Roman Input Parameters: 1762d24d4204SJose E. Roman + comm - MPI communicator 1763d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1764d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1765d24d4204SJose E. Roman . M - number of global rows 1766d24d4204SJose E. Roman . N - number of global columns 1767d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1768d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1769d24d4204SJose E. Roman 1770d24d4204SJose E. Roman Output Parameter: 1771d24d4204SJose E. Roman . A - the matrix 1772d24d4204SJose E. Roman 1773d24d4204SJose E. Roman Options Database Keys: 1774d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1775d24d4204SJose E. Roman 1776d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1777d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1778d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1779d24d4204SJose E. Roman 1780d24d4204SJose E. Roman Notes: 1781d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1782d24d4204SJose E. Roman is chosen. 1783d24d4204SJose E. Roman 1784d24d4204SJose E. Roman Storage Information: 1785d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1786d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1787d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1788d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1789d24d4204SJose E. Roman 1790d24d4204SJose E. Roman Level: intermediate 1791d24d4204SJose E. Roman 1792d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1793d24d4204SJose E. Roman @*/ 1794d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1795d24d4204SJose E. Roman { 1796d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1797d24d4204SJose E. Roman PetscInt m,n; 1798d24d4204SJose E. Roman 1799d24d4204SJose E. Roman PetscFunctionBegin; 18009566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 18019566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATSCALAPACK)); 18022c71b3e2SJacob Faibussowitsch PetscCheckFalse(M==PETSC_DECIDE || N==PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1803d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1804d24d4204SJose E. Roman m = PETSC_DECIDE; 18059566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 1806d24d4204SJose E. Roman n = PETSC_DECIDE; 18079566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 18089566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,M,N)); 1809d24d4204SJose E. Roman a = (Mat_ScaLAPACK*)(*A)->data; 18109566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M,&a->M)); 18119566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N,&a->N)); 18129566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 18139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 18149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc,&a->rsrc)); 18159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc,&a->csrc)); 18169566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A)); 1817d24d4204SJose E. Roman PetscFunctionReturn(0); 1818d24d4204SJose E. Roman } 1819