1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23f7ec113fSDamian Marek { 24f7ec113fSDamian Marek PetscFunctionBegin; 255f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); 265f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27f7ec113fSDamian Marek PetscFunctionReturn(0); 28f7ec113fSDamian Marek } 29f7ec113fSDamian Marek 30d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 31d24d4204SJose E. Roman { 32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 33d24d4204SJose E. Roman PetscBool iascii; 34d24d4204SJose E. Roman PetscViewerFormat format; 35d24d4204SJose E. Roman Mat Adense; 36d24d4204SJose E. Roman 37d24d4204SJose E. Roman PetscFunctionBegin; 385f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 39d24d4204SJose E. Roman if (iascii) { 405f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerGetFormat(viewer,&format)); 41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 425f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); 435f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); 445f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); 455f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); 46d24d4204SJose E. Roman PetscFunctionReturn(0); 47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48d24d4204SJose E. Roman PetscFunctionReturn(0); 49d24d4204SJose E. Roman } 50d24d4204SJose E. Roman } 51d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 525f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); 535f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(Adense,viewer)); 545f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Adense)); 55d24d4204SJose E. Roman PetscFunctionReturn(0); 56d24d4204SJose E. Roman } 57d24d4204SJose E. Roman 58d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 59d24d4204SJose E. Roman { 60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 61d24d4204SJose E. Roman PetscLogDouble isend[2],irecv[2]; 62d24d4204SJose E. Roman 63d24d4204SJose E. Roman PetscFunctionBegin; 64d24d4204SJose E. Roman info->block_size = 1.0; 65d24d4204SJose E. Roman 66d24d4204SJose E. Roman isend[0] = a->lld*a->locc; /* locally allocated */ 67d24d4204SJose E. Roman isend[1] = a->locr*a->locc; /* used submatrix */ 68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69d24d4204SJose E. Roman info->nz_allocated = isend[0]; 70d24d4204SJose E. Roman info->nz_used = isend[1]; 71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 725f80ce2aSJacob Faibussowitsch CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A))); 73d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 74d24d4204SJose E. Roman info->nz_used = irecv[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 765f80ce2aSJacob Faibussowitsch CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A))); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } 80d24d4204SJose E. Roman 81d24d4204SJose E. Roman info->nz_unneeded = 0; 82d24d4204SJose E. Roman info->assemblies = A->num_ass; 83d24d4204SJose E. Roman info->mallocs = 0; 84d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 85d24d4204SJose E. Roman info->fill_ratio_given = 0; 86d24d4204SJose E. Roman info->fill_ratio_needed = 0; 87d24d4204SJose E. Roman info->factor_mallocs = 0; 88d24d4204SJose E. Roman PetscFunctionReturn(0); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 91d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 92d24d4204SJose E. Roman { 93d24d4204SJose E. Roman PetscFunctionBegin; 94d24d4204SJose E. Roman switch (op) { 95d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 96d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 97d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 98d24d4204SJose E. Roman case MAT_SYMMETRIC: 99d24d4204SJose E. Roman case MAT_SORTED_FULL: 100d24d4204SJose E. Roman case MAT_HERMITIAN: 101d24d4204SJose E. Roman break; 102d24d4204SJose E. Roman default: 10398921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 104d24d4204SJose E. Roman } 105d24d4204SJose E. Roman PetscFunctionReturn(0); 106d24d4204SJose E. Roman } 107d24d4204SJose E. Roman 108d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 109d24d4204SJose E. Roman { 110d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 111d24d4204SJose E. Roman PetscInt i,j; 112d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 113d24d4204SJose E. Roman 114d24d4204SJose E. Roman PetscFunctionBegin; 115d24d4204SJose E. Roman for (i=0;i<nr;i++) { 116d24d4204SJose E. Roman if (rows[i] < 0) continue; 1175f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(rows[i]+1,&gridx)); 118d24d4204SJose E. Roman for (j=0;j<nc;j++) { 119d24d4204SJose E. Roman if (cols[j] < 0) continue; 1205f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(cols[j]+1,&gcidx)); 121d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 122d24d4204SJose E. Roman if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 123d24d4204SJose E. Roman switch (imode) { 124d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 125d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 12698921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 127d24d4204SJose E. Roman } 128d24d4204SJose E. Roman } else { 129*28b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 130d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 1315f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES))); 132d24d4204SJose E. Roman } 133d24d4204SJose E. Roman } 134d24d4204SJose E. Roman } 135d24d4204SJose E. Roman PetscFunctionReturn(0); 136d24d4204SJose E. Roman } 137d24d4204SJose E. Roman 138d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 139d24d4204SJose E. Roman { 140d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 141d24d4204SJose E. Roman PetscScalar *x2d,*y2d,alpha=1.0; 142d24d4204SJose E. Roman const PetscInt *ranges; 143d24d4204SJose E. Roman PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 144d24d4204SJose E. Roman 145d24d4204SJose E. Roman PetscFunctionBegin; 146d24d4204SJose E. Roman if (transpose) { 147d24d4204SJose E. Roman 148d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1495f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 1505f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 151d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 152d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 153d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1545f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 1555f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ 156d24d4204SJose E. Roman ylld = 1; 157d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 158d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 159d24d4204SJose E. Roman 160d24d4204SJose E. Roman /* allocate 2d vectors */ 161d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 162d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1635f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 164d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 167d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 168d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 169d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 170d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 171d24d4204SJose E. Roman 172d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 173d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 174d24d4204SJose E. Roman 175d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 176d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 177d24d4204SJose E. Roman 178d24d4204SJose E. Roman /* call PBLAS subroutine */ 179d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 180d24d4204SJose E. Roman 181d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 182d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 183d24d4204SJose E. Roman 184d24d4204SJose E. Roman } else { /* non-transpose */ 185d24d4204SJose E. Roman 186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 1875f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 1885f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ 189d24d4204SJose E. Roman xlld = 1; 190d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 191d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1925f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 1935f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ 194d24d4204SJose E. Roman ylld = PetscMax(1,A->rmap->n); 195d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 196d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* allocate 2d vectors */ 199d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 200d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 2015f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 202d24d4204SJose E. Roman ylld = PetscMax(1,lszy); 203d24d4204SJose E. Roman 204d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 205d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 206d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 207d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 208d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 209d24d4204SJose E. Roman 210d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 211d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 212d24d4204SJose E. Roman 213d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 214d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 215d24d4204SJose E. Roman 216d24d4204SJose E. Roman /* call PBLAS subroutine */ 217d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 218d24d4204SJose E. Roman 219d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 220d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 221d24d4204SJose E. Roman 222d24d4204SJose E. Roman } 2235f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(x2d,y2d)); 224d24d4204SJose E. Roman PetscFunctionReturn(0); 225d24d4204SJose E. Roman } 226d24d4204SJose E. Roman 227d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 228d24d4204SJose E. Roman { 229d24d4204SJose E. Roman const PetscScalar *xarray; 230d24d4204SJose E. Roman PetscScalar *yarray; 231d24d4204SJose E. Roman 232d24d4204SJose E. Roman PetscFunctionBegin; 2335f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x,&xarray)); 2345f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y,&yarray)); 2355f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); 2365f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x,&xarray)); 2375f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y,&yarray)); 238d24d4204SJose E. Roman PetscFunctionReturn(0); 239d24d4204SJose E. Roman } 240d24d4204SJose E. Roman 241d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 242d24d4204SJose E. Roman { 243d24d4204SJose E. Roman const PetscScalar *xarray; 244d24d4204SJose E. Roman PetscScalar *yarray; 245d24d4204SJose E. Roman 246d24d4204SJose E. Roman PetscFunctionBegin; 2475f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x,&xarray)); 2485f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y,&yarray)); 2495f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); 2505f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x,&xarray)); 2515f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y,&yarray)); 252d24d4204SJose E. Roman PetscFunctionReturn(0); 253d24d4204SJose E. Roman } 254d24d4204SJose E. Roman 255d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 256d24d4204SJose E. Roman { 257d24d4204SJose E. Roman const PetscScalar *xarray; 258d24d4204SJose E. Roman PetscScalar *zarray; 259d24d4204SJose E. Roman 260d24d4204SJose E. Roman PetscFunctionBegin; 2615f80ce2aSJacob Faibussowitsch if (y != z) CHKERRQ(VecCopy(y,z)); 2625f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x,&xarray)); 2635f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(z,&zarray)); 2645f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); 2655f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x,&xarray)); 2665f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(z,&zarray)); 267d24d4204SJose E. Roman PetscFunctionReturn(0); 268d24d4204SJose E. Roman } 269d24d4204SJose E. Roman 270d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 271d24d4204SJose E. Roman { 272d24d4204SJose E. Roman const PetscScalar *xarray; 273d24d4204SJose E. Roman PetscScalar *zarray; 274d24d4204SJose E. Roman 275d24d4204SJose E. Roman PetscFunctionBegin; 2765f80ce2aSJacob Faibussowitsch if (y != z) CHKERRQ(VecCopy(y,z)); 2775f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x,&xarray)); 2785f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(z,&zarray)); 2795f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); 2805f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x,&xarray)); 2815f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(z,&zarray)); 282d24d4204SJose E. Roman PetscFunctionReturn(0); 283d24d4204SJose E. Roman } 284d24d4204SJose E. Roman 285d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 286d24d4204SJose E. Roman { 287d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 288d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 289d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 290d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 291d24d4204SJose E. Roman PetscBLASInt one=1; 292d24d4204SJose E. Roman 293d24d4204SJose E. Roman PetscFunctionBegin; 294d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 295d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 296d24d4204SJose E. Roman PetscFunctionReturn(0); 297d24d4204SJose E. Roman } 298d24d4204SJose E. Roman 299d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 300d24d4204SJose E. Roman { 301d24d4204SJose E. Roman PetscFunctionBegin; 3025f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 3035f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(C,MATSCALAPACK)); 3045f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(C)); 305d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 306d24d4204SJose E. Roman PetscFunctionReturn(0); 307d24d4204SJose E. Roman } 308d24d4204SJose E. Roman 309d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 310d24d4204SJose E. Roman { 311d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 312d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 313d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 314d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 315d24d4204SJose E. Roman PetscBLASInt one=1; 316d24d4204SJose E. Roman 317d24d4204SJose E. Roman PetscFunctionBegin; 318d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 319d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 320d24d4204SJose E. Roman PetscFunctionReturn(0); 321d24d4204SJose E. Roman } 322d24d4204SJose E. Roman 323d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 324d24d4204SJose E. Roman { 325d24d4204SJose E. Roman PetscFunctionBegin; 3265f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); 3275f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(C,MATSCALAPACK)); 3285f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(C)); 329d24d4204SJose E. Roman PetscFunctionReturn(0); 330d24d4204SJose E. Roman } 331d24d4204SJose E. Roman 332d24d4204SJose E. Roman /* --------------------------------------- */ 333d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 334d24d4204SJose E. Roman { 335d24d4204SJose E. Roman PetscFunctionBegin; 336d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 337d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 338d24d4204SJose E. Roman PetscFunctionReturn(0); 339d24d4204SJose E. Roman } 340d24d4204SJose E. Roman 341d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 342d24d4204SJose E. Roman { 343d24d4204SJose E. Roman PetscFunctionBegin; 344d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 345d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 346d24d4204SJose E. Roman PetscFunctionReturn(0); 347d24d4204SJose E. Roman } 348d24d4204SJose E. Roman 349d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 350d24d4204SJose E. Roman { 351d24d4204SJose E. Roman Mat_Product *product = C->product; 352d24d4204SJose E. Roman 353d24d4204SJose E. Roman PetscFunctionBegin; 354d24d4204SJose E. Roman switch (product->type) { 355d24d4204SJose E. Roman case MATPRODUCT_AB: 3565f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_ScaLAPACK_AB(C)); 357d24d4204SJose E. Roman break; 358d24d4204SJose E. Roman case MATPRODUCT_ABt: 3595f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 360d24d4204SJose E. Roman break; 36198921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 362d24d4204SJose E. Roman } 363d24d4204SJose E. Roman PetscFunctionReturn(0); 364d24d4204SJose E. Roman } 365d24d4204SJose E. Roman /* --------------------------------------- */ 366d24d4204SJose E. Roman 367d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 368d24d4204SJose E. Roman { 369d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 370d24d4204SJose E. Roman PetscScalar *darray,*d2d,v; 371d24d4204SJose E. Roman const PetscInt *ranges; 372d24d4204SJose E. Roman PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 373d24d4204SJose E. Roman 374d24d4204SJose E. Roman PetscFunctionBegin; 3755f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(D,&darray)); 376d24d4204SJose E. Roman 377d24d4204SJose E. Roman if (A->rmap->N<=A->cmap->N) { /* row version */ 378d24d4204SJose E. Roman 379d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 3805f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 3815f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 382d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 383d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 384d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 385d24d4204SJose E. Roman 386d24d4204SJose E. Roman /* allocate 2d vector */ 387d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 3885f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(lszd,&d2d)); 389d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 390d24d4204SJose E. Roman 391d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 392d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 393d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 394d24d4204SJose E. Roman 395d24d4204SJose E. Roman /* collect diagonal */ 396d24d4204SJose E. Roman for (j=1;j<=a->M;j++) { 397d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 398d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 399d24d4204SJose E. Roman } 400d24d4204SJose E. Roman 401d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 402d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 4035f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(d2d)); 404d24d4204SJose E. Roman 405d24d4204SJose E. Roman } else { /* column version */ 406d24d4204SJose E. Roman 407d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4085f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 4095f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 410d24d4204SJose E. Roman dlld = 1; 411d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 412d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 413d24d4204SJose E. Roman 414d24d4204SJose E. Roman /* allocate 2d vector */ 415d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 4165f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(lszd,&d2d)); 417d24d4204SJose E. Roman 418d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 419d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 420d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 421d24d4204SJose E. Roman 422d24d4204SJose E. Roman /* collect diagonal */ 423d24d4204SJose E. Roman for (j=1;j<=a->N;j++) { 424d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 425d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 426d24d4204SJose E. Roman } 427d24d4204SJose E. Roman 428d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 429d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 4305f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(d2d)); 431d24d4204SJose E. Roman } 432d24d4204SJose E. Roman 4335f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(D,&darray)); 4345f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(D)); 4355f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(D)); 436d24d4204SJose E. Roman PetscFunctionReturn(0); 437d24d4204SJose E. Roman } 438d24d4204SJose E. Roman 439d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 440d24d4204SJose E. Roman { 441d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 442d24d4204SJose E. Roman const PetscScalar *d; 443d24d4204SJose E. Roman const PetscInt *ranges; 444d24d4204SJose E. Roman PetscScalar *d2d; 445d24d4204SJose E. Roman PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 446d24d4204SJose E. Roman 447d24d4204SJose E. Roman PetscFunctionBegin; 448d24d4204SJose E. Roman if (R) { 4495f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(R,(const PetscScalar **)&d)); 450d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4515f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 4525f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 453d24d4204SJose E. Roman dlld = 1; 454d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 455d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 456d24d4204SJose E. Roman 457d24d4204SJose E. Roman /* allocate 2d vector */ 458d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 4595f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(lszd,&d2d)); 460d24d4204SJose E. Roman 461d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 462d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 463d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 464d24d4204SJose E. Roman 465d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 466d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 467d24d4204SJose E. Roman 468d24d4204SJose E. Roman /* broadcast along process columns */ 469d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 470d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 471d24d4204SJose E. Roman 472d24d4204SJose E. Roman /* local scaling */ 473d24d4204SJose E. Roman for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 474d24d4204SJose E. Roman 4755f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(d2d)); 4765f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(R,(const PetscScalar **)&d)); 477d24d4204SJose E. Roman } 478d24d4204SJose E. Roman if (L) { 4795f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(L,(const PetscScalar **)&d)); 480d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 4815f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 4825f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 483d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 484d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 485d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 486d24d4204SJose E. Roman 487d24d4204SJose E. Roman /* allocate 2d vector */ 488d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 4895f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(lszd,&d2d)); 490d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 491d24d4204SJose E. Roman 492d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 493d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 494d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 495d24d4204SJose E. Roman 496d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 497d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 498d24d4204SJose E. Roman 499d24d4204SJose E. Roman /* broadcast along process rows */ 500d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 501d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 502d24d4204SJose E. Roman 503d24d4204SJose E. Roman /* local scaling */ 504d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 505d24d4204SJose E. Roman 5065f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(d2d)); 5075f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(L,(const PetscScalar **)&d)); 508d24d4204SJose E. Roman } 509d24d4204SJose E. Roman PetscFunctionReturn(0); 510d24d4204SJose E. Roman } 511d24d4204SJose E. Roman 512d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 513d24d4204SJose E. Roman { 514d24d4204SJose E. Roman PetscFunctionBegin; 515d24d4204SJose E. Roman *missing = PETSC_FALSE; 516d24d4204SJose E. Roman PetscFunctionReturn(0); 517d24d4204SJose E. Roman } 518d24d4204SJose E. Roman 519d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 520d24d4204SJose E. Roman { 521d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 522d24d4204SJose E. Roman PetscBLASInt n,one=1; 523d24d4204SJose E. Roman 524d24d4204SJose E. Roman PetscFunctionBegin; 525d24d4204SJose E. Roman n = x->lld*x->locc; 526d24d4204SJose E. Roman PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 527d24d4204SJose E. Roman PetscFunctionReturn(0); 528d24d4204SJose E. Roman } 529d24d4204SJose E. Roman 530d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 531d24d4204SJose E. Roman { 532d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 533d24d4204SJose E. Roman PetscBLASInt i,n; 534d24d4204SJose E. Roman PetscScalar v; 535d24d4204SJose E. Roman 536d24d4204SJose E. Roman PetscFunctionBegin; 537d24d4204SJose E. Roman n = PetscMin(x->M,x->N); 538d24d4204SJose E. Roman for (i=1;i<=n;i++) { 539d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 540d24d4204SJose E. Roman v += alpha; 541d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 542d24d4204SJose E. Roman } 543d24d4204SJose E. Roman PetscFunctionReturn(0); 544d24d4204SJose E. Roman } 545d24d4204SJose E. Roman 546d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 547d24d4204SJose E. Roman { 548d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 549d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 550d24d4204SJose E. Roman PetscBLASInt one=1; 551d24d4204SJose E. Roman PetscScalar beta=1.0; 552d24d4204SJose E. Roman 553d24d4204SJose E. Roman PetscFunctionBegin; 554d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y,1,X,3); 555d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 5565f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateIncrease((PetscObject)Y)); 557d24d4204SJose E. Roman PetscFunctionReturn(0); 558d24d4204SJose E. Roman } 559d24d4204SJose E. Roman 560d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 561d24d4204SJose E. Roman { 562d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 563d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 564d24d4204SJose E. Roman 565d24d4204SJose E. Roman PetscFunctionBegin; 5665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 5675f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateIncrease((PetscObject)B)); 568d24d4204SJose E. Roman PetscFunctionReturn(0); 569d24d4204SJose E. Roman } 570d24d4204SJose E. Roman 571d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 572d24d4204SJose E. Roman { 573d24d4204SJose E. Roman Mat Bs; 574d24d4204SJose E. Roman MPI_Comm comm; 575d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 576d24d4204SJose E. Roman 577d24d4204SJose E. Roman PetscFunctionBegin; 5785f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 5795f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,&Bs)); 5805f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 5815f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(Bs,MATSCALAPACK)); 582d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 583d24d4204SJose E. Roman b->M = a->M; 584d24d4204SJose E. Roman b->N = a->N; 585d24d4204SJose E. Roman b->mb = a->mb; 586d24d4204SJose E. Roman b->nb = a->nb; 587d24d4204SJose E. Roman b->rsrc = a->rsrc; 588d24d4204SJose E. Roman b->csrc = a->csrc; 5895f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(Bs)); 590d24d4204SJose E. Roman *B = Bs; 591d24d4204SJose E. Roman if (op == MAT_COPY_VALUES) { 5925f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 593d24d4204SJose E. Roman } 594d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 595d24d4204SJose E. Roman PetscFunctionReturn(0); 596d24d4204SJose E. Roman } 597d24d4204SJose E. Roman 598d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 599d24d4204SJose E. Roman { 600d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601d24d4204SJose E. Roman Mat Bs = *B; 602d24d4204SJose E. Roman PetscBLASInt one=1; 603d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 604d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 605d24d4204SJose E. Roman PetscInt i,j; 606d24d4204SJose E. Roman #endif 607d24d4204SJose E. Roman 608d24d4204SJose E. Roman PetscFunctionBegin; 609d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6105f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 611d24d4204SJose E. Roman *B = Bs; 612d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 613d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 614d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 615d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 616d24d4204SJose E. Roman /* undo conjugation */ 617d24d4204SJose E. Roman for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 618d24d4204SJose E. Roman #endif 619d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 620d24d4204SJose E. Roman PetscFunctionReturn(0); 621d24d4204SJose E. Roman } 622d24d4204SJose E. Roman 623d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 624d24d4204SJose E. Roman { 625d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 626d24d4204SJose E. Roman PetscInt i,j; 627d24d4204SJose E. Roman 628d24d4204SJose E. Roman PetscFunctionBegin; 629d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 630d24d4204SJose E. Roman PetscFunctionReturn(0); 631d24d4204SJose E. Roman } 632d24d4204SJose E. Roman 633d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 634d24d4204SJose E. Roman { 635d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 636d24d4204SJose E. Roman Mat Bs = *B; 637d24d4204SJose E. Roman PetscBLASInt one=1; 638d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 639d24d4204SJose E. Roman 640d24d4204SJose E. Roman PetscFunctionBegin; 641d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 6425f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 643d24d4204SJose E. Roman *B = Bs; 644d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 645d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 646d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 647d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 648d24d4204SJose E. Roman PetscFunctionReturn(0); 649d24d4204SJose E. Roman } 650d24d4204SJose E. Roman 651d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 652d24d4204SJose E. Roman { 653d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 654d24d4204SJose E. Roman PetscScalar *x,*x2d; 655d24d4204SJose E. Roman const PetscInt *ranges; 656d24d4204SJose E. Roman PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 657d24d4204SJose E. Roman 658d24d4204SJose E. Roman PetscFunctionBegin; 6595f80ce2aSJacob Faibussowitsch CHKERRQ(VecCopy(B,X)); 6605f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(X,&x)); 661d24d4204SJose E. Roman 662d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 6635f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 6645f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 665d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 666d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 667d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 668d24d4204SJose E. Roman 669d24d4204SJose E. Roman /* allocate 2d vector */ 670d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 6715f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(lszx,&x2d)); 672d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 673d24d4204SJose E. Roman 674d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 675d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 676d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 677d24d4204SJose E. Roman 678d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 679d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 680d24d4204SJose E. Roman 681d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 682d24d4204SJose E. Roman switch (A->factortype) { 683d24d4204SJose E. Roman case MAT_FACTOR_LU: 684d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 685d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 686d24d4204SJose E. Roman break; 687d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 688d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 689d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 690d24d4204SJose E. Roman break; 691d24d4204SJose E. Roman default: 692d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 693d24d4204SJose E. Roman } 694d24d4204SJose E. Roman 695d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 696d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 697d24d4204SJose E. Roman 6985f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(x2d)); 6995f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(X,&x)); 700d24d4204SJose E. Roman PetscFunctionReturn(0); 701d24d4204SJose E. Roman } 702d24d4204SJose E. Roman 703d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 704d24d4204SJose E. Roman { 705d24d4204SJose E. Roman PetscFunctionBegin; 7065f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolve_ScaLAPACK(A,B,X)); 7075f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(X,1,Y)); 708d24d4204SJose E. Roman PetscFunctionReturn(0); 709d24d4204SJose E. Roman } 710d24d4204SJose E. Roman 711d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 712d24d4204SJose E. Roman { 713d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 714d24d4204SJose E. Roman PetscBool flg1,flg2; 715d24d4204SJose E. Roman PetscBLASInt one=1,info; 716d24d4204SJose E. Roman 717d24d4204SJose E. Roman PetscFunctionBegin; 7185f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); 7195f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); 7202c71b3e2SJacob Faibussowitsch PetscCheckFalse(!(flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 721d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B,1,X,2); 722d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)B->data; 723d24d4204SJose E. Roman x = (Mat_ScaLAPACK*)X->data; 7245f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); 725d24d4204SJose E. Roman 726d24d4204SJose E. Roman switch (A->factortype) { 727d24d4204SJose E. Roman case MAT_FACTOR_LU: 728d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 729d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 730d24d4204SJose E. Roman break; 731d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 732d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 733d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 734d24d4204SJose E. Roman break; 735d24d4204SJose E. Roman default: 736d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 737d24d4204SJose E. Roman } 738d24d4204SJose E. Roman PetscFunctionReturn(0); 739d24d4204SJose E. Roman } 740d24d4204SJose E. Roman 741d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 742d24d4204SJose E. Roman { 743d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 744d24d4204SJose E. Roman PetscBLASInt one=1,info; 745d24d4204SJose E. Roman 746d24d4204SJose E. Roman PetscFunctionBegin; 747d24d4204SJose E. Roman if (!a->pivots) { 7485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(a->locr+a->mb,&a->pivots)); 7495f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); 750d24d4204SJose E. Roman } 751d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 752d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf",info); 753d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 754d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 755d24d4204SJose E. Roman 7565f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(A->solvertype)); 7575f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 758d24d4204SJose E. Roman PetscFunctionReturn(0); 759d24d4204SJose E. Roman } 760d24d4204SJose E. Roman 761d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 762d24d4204SJose E. Roman { 763d24d4204SJose E. Roman PetscFunctionBegin; 7645f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN)); 7655f80ce2aSJacob Faibussowitsch CHKERRQ(MatLUFactor_ScaLAPACK(F,0,0,info)); 766d24d4204SJose E. Roman PetscFunctionReturn(0); 767d24d4204SJose E. Roman } 768d24d4204SJose E. Roman 769d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 770d24d4204SJose E. Roman { 771d24d4204SJose E. Roman PetscFunctionBegin; 772d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 773d24d4204SJose E. Roman PetscFunctionReturn(0); 774d24d4204SJose E. Roman } 775d24d4204SJose E. Roman 776d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 777d24d4204SJose E. Roman { 778d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 779d24d4204SJose E. Roman PetscBLASInt one=1,info; 780d24d4204SJose E. Roman 781d24d4204SJose E. Roman PetscFunctionBegin; 782d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 783d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf",info); 784d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 785d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 786d24d4204SJose E. Roman 7875f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(A->solvertype)); 7885f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 789d24d4204SJose E. Roman PetscFunctionReturn(0); 790d24d4204SJose E. Roman } 791d24d4204SJose E. Roman 792d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 793d24d4204SJose E. Roman { 794d24d4204SJose E. Roman PetscFunctionBegin; 7955f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN)); 7965f80ce2aSJacob Faibussowitsch CHKERRQ(MatCholeskyFactor_ScaLAPACK(F,0,info)); 797d24d4204SJose E. Roman PetscFunctionReturn(0); 798d24d4204SJose E. Roman } 799d24d4204SJose E. Roman 800d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 801d24d4204SJose E. Roman { 802d24d4204SJose E. Roman PetscFunctionBegin; 803d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 804d24d4204SJose E. Roman PetscFunctionReturn(0); 805d24d4204SJose E. Roman } 806d24d4204SJose E. Roman 807d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 808d24d4204SJose E. Roman { 809d24d4204SJose E. Roman PetscFunctionBegin; 810d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 811d24d4204SJose E. Roman PetscFunctionReturn(0); 812d24d4204SJose E. Roman } 813d24d4204SJose E. Roman 814d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 815d24d4204SJose E. Roman { 816d24d4204SJose E. Roman Mat B; 81759172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 818d24d4204SJose E. Roman 819d24d4204SJose E. Roman PetscFunctionBegin; 820d24d4204SJose E. Roman /* Create the factorization matrix */ 8215f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); 82266e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 823d24d4204SJose E. Roman B->factortype = ftype; 8245f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(B->solvertype)); 8255f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); 826d24d4204SJose E. Roman 8275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); 828d24d4204SJose E. Roman *F = B; 829d24d4204SJose E. Roman PetscFunctionReturn(0); 830d24d4204SJose E. Roman } 831d24d4204SJose E. Roman 832d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 833d24d4204SJose E. Roman { 834d24d4204SJose E. Roman PetscFunctionBegin; 8355f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); 8365f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); 837d24d4204SJose E. Roman PetscFunctionReturn(0); 838d24d4204SJose E. Roman } 839d24d4204SJose E. Roman 840d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 841d24d4204SJose E. Roman { 842d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 843d24d4204SJose E. Roman PetscBLASInt one=1,lwork=0; 844d24d4204SJose E. Roman const char *ntype; 845d68f4f38SPierre Jolivet PetscScalar *work=NULL,dummy; 846d24d4204SJose E. Roman 847d24d4204SJose E. Roman PetscFunctionBegin; 848d24d4204SJose E. Roman switch (type) { 849d24d4204SJose E. Roman case NORM_1: 850d24d4204SJose E. Roman ntype = "1"; 851d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 852d24d4204SJose E. Roman break; 853d24d4204SJose E. Roman case NORM_FROBENIUS: 854d24d4204SJose E. Roman ntype = "F"; 855d24d4204SJose E. Roman work = &dummy; 856d24d4204SJose E. Roman break; 857d24d4204SJose E. Roman case NORM_INFINITY: 858d24d4204SJose E. Roman ntype = "I"; 859d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 860d24d4204SJose E. Roman break; 861d24d4204SJose E. Roman default: 862d24d4204SJose E. Roman SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 863d24d4204SJose E. Roman } 8645f80ce2aSJacob Faibussowitsch if (lwork) CHKERRQ(PetscMalloc1(lwork,&work)); 865d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 8665f80ce2aSJacob Faibussowitsch if (lwork) CHKERRQ(PetscFree(work)); 867d24d4204SJose E. Roman PetscFunctionReturn(0); 868d24d4204SJose E. Roman } 869d24d4204SJose E. Roman 870d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 871d24d4204SJose E. Roman { 872d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 873d24d4204SJose E. Roman 874d24d4204SJose E. Roman PetscFunctionBegin; 8755f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(a->loc,a->lld*a->locc)); 876d24d4204SJose E. Roman PetscFunctionReturn(0); 877d24d4204SJose E. Roman } 878d24d4204SJose E. Roman 879d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 880d24d4204SJose E. Roman { 881d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 882d24d4204SJose E. Roman PetscInt i,n,nb,isrc,nproc,iproc,*idx; 883d24d4204SJose E. Roman 884d24d4204SJose E. Roman PetscFunctionBegin; 885d24d4204SJose E. Roman if (rows) { 886d24d4204SJose E. Roman n = a->locr; 887d24d4204SJose E. Roman nb = a->mb; 888d24d4204SJose E. Roman isrc = a->rsrc; 889d24d4204SJose E. Roman nproc = a->grid->nprow; 890d24d4204SJose E. Roman iproc = a->grid->myrow; 8915f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n,&idx)); 892d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 8935f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows)); 894d24d4204SJose E. Roman } 895d24d4204SJose E. Roman if (cols) { 896d24d4204SJose E. Roman n = a->locc; 897d24d4204SJose E. Roman nb = a->nb; 898d24d4204SJose E. Roman isrc = a->csrc; 899d24d4204SJose E. Roman nproc = a->grid->npcol; 900d24d4204SJose E. Roman iproc = a->grid->mycol; 9015f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n,&idx)); 902d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 9035f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols)); 904d24d4204SJose E. Roman } 905d24d4204SJose E. Roman PetscFunctionReturn(0); 906d24d4204SJose E. Roman } 907d24d4204SJose E. Roman 908d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 909d24d4204SJose E. Roman { 910d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 911d24d4204SJose E. Roman Mat Bmpi; 912d24d4204SJose E. Roman MPI_Comm comm; 9134b1a79daSJose E. Roman PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 9144b1a79daSJose E. Roman const PetscInt *ranges,*branges,*cwork; 9154b1a79daSJose E. Roman const PetscScalar *vwork; 916d24d4204SJose E. Roman PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 917d24d4204SJose E. Roman PetscScalar *barray; 9184b1a79daSJose E. Roman PetscBool differ=PETSC_FALSE; 9194b1a79daSJose E. Roman PetscMPIInt size; 920d24d4204SJose E. Roman 921d24d4204SJose E. Roman PetscFunctionBegin; 9225f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 9235f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 9244b1a79daSJose E. Roman 9254b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 9265f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(comm,&size)); 9275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges((*B)->rmap,&branges)); 9284b1a79daSJose E. Roman for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 9294b1a79daSJose E. Roman } 9304b1a79daSJose E. Roman 9314b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9325f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,&Bmpi)); 9334b1a79daSJose E. Roman m = PETSC_DECIDE; 9345f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 9354b1a79daSJose E. Roman n = PETSC_DECIDE; 9365f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 9375f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(Bmpi,m,n,M,N)); 9385f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(Bmpi,MATDENSE)); 9395f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(Bmpi)); 9404b1a79daSJose E. Roman 9414b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9425f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 9434b1a79daSJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 9444b1a79daSJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 9454b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit",info); 9464b1a79daSJose E. Roman 9474b1a79daSJose E. Roman /* redistribute matrix */ 9485f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(Bmpi,&barray)); 9494b1a79daSJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 9505f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(Bmpi,&barray)); 9515f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 9525f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 9534b1a79daSJose E. Roman 9544b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9555f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(Bmpi,&rstart,&rend)); 9564b1a79daSJose E. Roman for (i=rstart;i<rend;i++) { 9575f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetRow(Bmpi,i,&nz,&cwork,&vwork)); 9585f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES)); 9595f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork)); 9604b1a79daSJose E. Roman } 9615f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY)); 9625f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY)); 9635f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Bmpi)); 9644b1a79daSJose E. Roman 9654b1a79daSJose E. Roman } else { /* normal cases */ 966d24d4204SJose E. Roman 967d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 968d24d4204SJose E. Roman else { 9695f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,&Bmpi)); 97092c846b4SJose E. Roman m = PETSC_DECIDE; 9715f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 97292c846b4SJose E. Roman n = PETSC_DECIDE; 9735f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 9745f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(Bmpi,m,n,M,N)); 9755f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(Bmpi,MATDENSE)); 9765f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(Bmpi)); 977d24d4204SJose E. Roman } 978d24d4204SJose E. Roman 979d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9805f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 981d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 982d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 983d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 984d24d4204SJose E. Roman 985d24d4204SJose E. Roman /* redistribute matrix */ 9865f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(Bmpi,&barray)); 987d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 9885f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(Bmpi,&barray)); 989d24d4204SJose E. Roman 9905f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 9915f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 992d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 9935f80ce2aSJacob Faibussowitsch CHKERRQ(MatHeaderReplace(A,&Bmpi)); 994d24d4204SJose E. Roman } else *B = Bmpi; 9954b1a79daSJose E. Roman } 996d24d4204SJose E. Roman PetscFunctionReturn(0); 997d24d4204SJose E. Roman } 998d24d4204SJose E. Roman 999d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1000d24d4204SJose E. Roman { 1001d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1002d24d4204SJose E. Roman Mat Bmpi; 1003d24d4204SJose E. Roman MPI_Comm comm; 100492c846b4SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1005d24d4204SJose E. Roman const PetscInt *ranges; 1006d24d4204SJose E. Roman PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1007d24d4204SJose E. Roman PetscScalar *aarray; 10084e8b52a1SJose E. Roman PetscInt lda; 1009d24d4204SJose E. Roman 1010d24d4204SJose E. Roman PetscFunctionBegin; 10115f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 1012d24d4204SJose E. Roman 1013d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1014d24d4204SJose E. Roman else { 10155f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,&Bmpi)); 101692c846b4SJose E. Roman m = PETSC_DECIDE; 10175f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 101892c846b4SJose E. Roman n = PETSC_DECIDE; 10195f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 10205f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(Bmpi,m,n,M,N)); 10215f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(Bmpi,MATSCALAPACK)); 10225f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(Bmpi)); 1023d24d4204SJose E. Roman } 1024d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bmpi->data; 1025d24d4204SJose E. Roman 1026d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 10275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 10285f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ 10295f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetLDA(A,&lda)); 10304e8b52a1SJose E. Roman lld = PetscMax(lda,1); /* local leading dimension */ 1031d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1032d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1033d24d4204SJose E. Roman 1034d24d4204SJose E. Roman /* redistribute matrix */ 10355f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(A,&aarray)); 1036d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 10375f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(A,&aarray)); 1038d24d4204SJose E. Roman 10395f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 10405f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1041d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 10425f80ce2aSJacob Faibussowitsch CHKERRQ(MatHeaderReplace(A,&Bmpi)); 1043d24d4204SJose E. Roman } else *B = Bmpi; 1044d24d4204SJose E. Roman PetscFunctionReturn(0); 1045d24d4204SJose E. Roman } 1046d24d4204SJose E. Roman 1047d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1048d24d4204SJose E. Roman { 1049d24d4204SJose E. Roman Mat mat_scal; 1050d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1051d24d4204SJose E. Roman const PetscInt *cols; 1052d24d4204SJose E. Roman const PetscScalar *vals; 1053d24d4204SJose E. Roman 1054d24d4204SJose E. Roman PetscFunctionBegin; 1055d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1056d24d4204SJose E. Roman mat_scal = *newmat; 10575f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroEntries(mat_scal)); 1058d24d4204SJose E. Roman } else { 10595f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1060d24d4204SJose E. Roman m = PETSC_DECIDE; 10615f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1062d24d4204SJose E. Roman n = PETSC_DECIDE; 10635f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 10645f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(mat_scal,m,n,M,N)); 10655f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(mat_scal,MATSCALAPACK)); 10665f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(mat_scal)); 1067d24d4204SJose E. Roman } 1068d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 10695f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals)); 10705f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES)); 10715f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1072d24d4204SJose E. Roman } 10735f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 10745f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1075d24d4204SJose E. Roman 10765f80ce2aSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal)); 1077d24d4204SJose E. Roman else *newmat = mat_scal; 1078d24d4204SJose E. Roman PetscFunctionReturn(0); 1079d24d4204SJose E. Roman } 1080d24d4204SJose E. Roman 1081d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1082d24d4204SJose E. Roman { 1083d24d4204SJose E. Roman Mat mat_scal; 1084d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1085d24d4204SJose E. Roman const PetscInt *cols; 1086d24d4204SJose E. Roman const PetscScalar *vals; 1087d24d4204SJose E. Roman PetscScalar v; 1088d24d4204SJose E. Roman 1089d24d4204SJose E. Roman PetscFunctionBegin; 1090d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1091d24d4204SJose E. Roman mat_scal = *newmat; 10925f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroEntries(mat_scal)); 1093d24d4204SJose E. Roman } else { 10945f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1095d24d4204SJose E. Roman m = PETSC_DECIDE; 10965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1097d24d4204SJose E. Roman n = PETSC_DECIDE; 10985f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 10995f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(mat_scal,m,n,M,N)); 11005f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(mat_scal,MATSCALAPACK)); 11015f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(mat_scal)); 1102d24d4204SJose E. Roman } 11035f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetRowUpperTriangular(A)); 1104d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 11055f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals)); 11065f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES)); 1107d24d4204SJose E. Roman for (j=0;j<ncols;j++) { /* lower triangular part */ 1108d24d4204SJose E. Roman if (cols[j] == row) continue; 1109d24d4204SJose E. Roman v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 11105f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); 1111d24d4204SJose E. Roman } 11125f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1113d24d4204SJose E. Roman } 11145f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreRowUpperTriangular(A)); 11155f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 11165f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1117d24d4204SJose E. Roman 11185f80ce2aSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal)); 1119d24d4204SJose E. Roman else *newmat = mat_scal; 1120d24d4204SJose E. Roman PetscFunctionReturn(0); 1121d24d4204SJose E. Roman } 1122d24d4204SJose E. Roman 1123d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1124d24d4204SJose E. Roman { 1125d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1126d24d4204SJose E. Roman PetscInt sz=0; 1127d24d4204SJose E. Roman 1128d24d4204SJose E. Roman PetscFunctionBegin; 11295f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutSetUp(A->rmap)); 11305f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutSetUp(A->cmap)); 1131d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1132d24d4204SJose E. Roman 11335f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(a->loc)); 11345f80ce2aSJacob Faibussowitsch CHKERRQ(PetscIntMultError(a->lld,a->locc,&sz)); 11355f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(sz,&a->loc)); 11365f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); 1137d24d4204SJose E. Roman 1138d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1139d24d4204SJose E. Roman PetscFunctionReturn(0); 1140d24d4204SJose E. Roman } 1141d24d4204SJose E. Roman 1142d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1143d24d4204SJose E. Roman { 1144d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1145d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1146d24d4204SJose E. Roman PetscBool flg; 1147d24d4204SJose E. Roman MPI_Comm icomm; 1148d24d4204SJose E. Roman 1149d24d4204SJose E. Roman PetscFunctionBegin; 11505f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashDestroy_Private(&A->stash)); 11515f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(a->loc)); 11525f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(a->pivots)); 11535f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 11545f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1155d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1156d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1157d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1158d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 11595f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(grid)); 11605f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); 1161d24d4204SJose E. Roman } 11625f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDestroy(&icomm)); 11635f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); 11645f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 11655f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); 11665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); 11675f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(A->data)); 1168d24d4204SJose E. Roman PetscFunctionReturn(0); 1169d24d4204SJose E. Roman } 1170d24d4204SJose E. Roman 11719fbee547SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1172d24d4204SJose E. Roman { 1173d24d4204SJose E. Roman const PetscInt *ranges; 1174d24d4204SJose E. Roman PetscMPIInt size; 1175d24d4204SJose E. Roman PetscInt i,n; 1176d24d4204SJose E. Roman 1177d24d4204SJose E. Roman PetscFunctionBegin; 11785f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(map->comm,&size)); 1179d24d4204SJose E. Roman if (size>2) { 11805f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutGetRanges(map,&ranges)); 1181d24d4204SJose E. Roman n = ranges[1]-ranges[0]; 1182d24d4204SJose E. Roman for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 11832c71b3e2SJacob Faibussowitsch PetscCheckFalse(i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1184d24d4204SJose E. Roman } 1185d24d4204SJose E. Roman PetscFunctionReturn(0); 1186d24d4204SJose E. Roman } 1187d24d4204SJose E. Roman 1188d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1189d24d4204SJose E. Roman { 1190d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1191d24d4204SJose E. Roman PetscBLASInt info=0; 1192d24d4204SJose E. Roman 1193d24d4204SJose E. Roman PetscFunctionBegin; 11945f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutSetUp(A->rmap)); 11955f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLayoutSetUp(A->cmap)); 1196d24d4204SJose E. Roman 1197d24d4204SJose E. Roman /* check that the layout is as enforced by MatCreateScaLAPACK */ 11985f80ce2aSJacob Faibussowitsch CHKERRQ(MatScaLAPACKCheckLayout(A->rmap)); 11995f80ce2aSJacob Faibussowitsch CHKERRQ(MatScaLAPACKCheckLayout(A->cmap)); 1200d24d4204SJose E. Roman 1201d24d4204SJose E. Roman /* compute local sizes */ 12025f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(A->rmap->N,&a->M)); 12035f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(A->cmap->N,&a->N)); 1204d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1205d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1206d24d4204SJose E. Roman a->lld = PetscMax(1,a->locr); 1207d24d4204SJose E. Roman 1208d24d4204SJose E. Roman /* allocate local array */ 12095f80ce2aSJacob Faibussowitsch CHKERRQ(MatScaLAPACKSetPreallocation(A)); 1210d24d4204SJose E. Roman 1211d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1212d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1213d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1214d24d4204SJose E. Roman PetscFunctionReturn(0); 1215d24d4204SJose E. Roman } 1216d24d4204SJose E. Roman 1217d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1218d24d4204SJose E. Roman { 1219d24d4204SJose E. Roman PetscInt nstash,reallocs; 1220d24d4204SJose E. Roman 1221d24d4204SJose E. Roman PetscFunctionBegin; 1222d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 12235f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashScatterBegin_Private(A,&A->stash,NULL)); 12245f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); 12255f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 1226d24d4204SJose E. Roman PetscFunctionReturn(0); 1227d24d4204SJose E. Roman } 1228d24d4204SJose E. Roman 1229d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1230d24d4204SJose E. Roman { 1231d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1232d24d4204SJose E. Roman PetscMPIInt n; 1233d24d4204SJose E. Roman PetscInt i,flg,*row,*col; 1234d24d4204SJose E. Roman PetscScalar *val; 1235d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1236d24d4204SJose E. Roman 1237d24d4204SJose E. Roman PetscFunctionBegin; 1238d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1239d24d4204SJose E. Roman while (1) { 12405f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); 1241d24d4204SJose E. Roman if (!flg) break; 1242d24d4204SJose E. Roman for (i=0;i<n;i++) { 12435f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(row[i]+1,&gridx)); 12445f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(col[i]+1,&gcidx)); 1245d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 12462c71b3e2SJacob Faibussowitsch PetscCheckFalse(rsrc!=a->grid->myrow || csrc!=a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1247d24d4204SJose E. Roman switch (A->insertmode) { 1248d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1249d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 125098921bdaSJacob Faibussowitsch default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1251d24d4204SJose E. Roman } 1252d24d4204SJose E. Roman } 1253d24d4204SJose E. Roman } 12545f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashScatterEnd_Private(&A->stash)); 1255d24d4204SJose E. Roman PetscFunctionReturn(0); 1256d24d4204SJose E. Roman } 1257d24d4204SJose E. Roman 1258d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1259d24d4204SJose E. Roman { 1260d24d4204SJose E. Roman Mat Adense,As; 1261d24d4204SJose E. Roman MPI_Comm comm; 1262d24d4204SJose E. Roman 1263d24d4204SJose E. Roman PetscFunctionBegin; 12645f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetComm((PetscObject)newMat,&comm)); 12655f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,&Adense)); 12665f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(Adense,MATDENSE)); 12675f80ce2aSJacob Faibussowitsch CHKERRQ(MatLoad(Adense,viewer)); 12685f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); 12695f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Adense)); 12705f80ce2aSJacob Faibussowitsch CHKERRQ(MatHeaderReplace(newMat,&As)); 1271d24d4204SJose E. Roman PetscFunctionReturn(0); 1272d24d4204SJose E. Roman } 1273d24d4204SJose E. Roman 1274d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 1275d24d4204SJose E. Roman static struct _MatOps MatOps_Values = { 1276d24d4204SJose E. Roman MatSetValues_ScaLAPACK, 1277d24d4204SJose E. Roman 0, 1278d24d4204SJose E. Roman 0, 1279d24d4204SJose E. Roman MatMult_ScaLAPACK, 1280d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1281d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1282d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1283d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1284d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1285d24d4204SJose E. Roman 0, 1286d24d4204SJose E. Roman /*10*/ 0, 1287d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1288d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1289d24d4204SJose E. Roman 0, 1290d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1291d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1292d24d4204SJose E. Roman 0, 1293d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1294d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1295d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1296d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1297d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1298d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1299d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1300d24d4204SJose E. Roman /*24*/ 0, 1301d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1302d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1303d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1304d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1305d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1306d24d4204SJose E. Roman 0, 1307d24d4204SJose E. Roman 0, 1308d24d4204SJose E. Roman 0, 1309d24d4204SJose E. Roman 0, 1310d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1311d24d4204SJose E. Roman 0, 1312d24d4204SJose E. Roman 0, 1313d24d4204SJose E. Roman 0, 1314d24d4204SJose E. Roman 0, 1315d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1316d24d4204SJose E. Roman 0, 1317d24d4204SJose E. Roman 0, 1318d24d4204SJose E. Roman 0, 1319d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1320d24d4204SJose E. Roman /*44*/ 0, 1321d24d4204SJose E. Roman MatScale_ScaLAPACK, 1322d24d4204SJose E. Roman MatShift_ScaLAPACK, 1323d24d4204SJose E. Roman 0, 1324d24d4204SJose E. Roman 0, 1325d24d4204SJose E. Roman /*49*/ 0, 1326d24d4204SJose E. Roman 0, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman 0, 1329d24d4204SJose E. Roman 0, 1330d24d4204SJose E. Roman /*54*/ 0, 1331d24d4204SJose E. Roman 0, 1332d24d4204SJose E. Roman 0, 1333d24d4204SJose E. Roman 0, 1334d24d4204SJose E. Roman 0, 1335d24d4204SJose E. Roman /*59*/ 0, 1336d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1337d24d4204SJose E. Roman MatView_ScaLAPACK, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman 0, 1340d24d4204SJose E. Roman /*64*/ 0, 1341d24d4204SJose E. Roman 0, 1342d24d4204SJose E. Roman 0, 1343d24d4204SJose E. Roman 0, 1344d24d4204SJose E. Roman 0, 1345d24d4204SJose E. Roman /*69*/ 0, 1346d24d4204SJose E. Roman 0, 1347d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1348d24d4204SJose E. Roman 0, 1349d24d4204SJose E. Roman 0, 1350d24d4204SJose E. Roman /*74*/ 0, 1351d24d4204SJose E. Roman 0, 1352d24d4204SJose E. Roman 0, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman 0, 1355d24d4204SJose E. Roman /*79*/ 0, 1356d24d4204SJose E. Roman 0, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1360d24d4204SJose E. Roman /*84*/ 0, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman 0, 1365d24d4204SJose E. Roman /*89*/ 0, 1366d24d4204SJose E. Roman 0, 1367d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1368d24d4204SJose E. Roman 0, 1369d24d4204SJose E. Roman 0, 1370d24d4204SJose E. Roman /*94*/ 0, 1371d24d4204SJose E. Roman 0, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1374d24d4204SJose E. Roman 0, 1375d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1376d24d4204SJose E. Roman 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1379d24d4204SJose E. Roman 0, 1380d24d4204SJose E. Roman /*104*/0, 1381d24d4204SJose E. Roman 0, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman 0, 1385d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK, 1386d24d4204SJose E. Roman 0, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1390d24d4204SJose E. Roman /*114*/0, 1391d24d4204SJose E. Roman 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman 0, 1395d24d4204SJose E. Roman /*119*/0, 1396d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1397d24d4204SJose E. Roman 0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman 0, 1400d24d4204SJose E. Roman /*124*/0, 1401d24d4204SJose E. Roman 0, 1402d24d4204SJose E. Roman 0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman 0, 1405d24d4204SJose E. Roman /*129*/0, 1406d24d4204SJose E. Roman 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman 0, 1409d24d4204SJose E. Roman 0, 1410d24d4204SJose E. Roman /*134*/0, 1411d24d4204SJose E. Roman 0, 1412d24d4204SJose E. Roman 0, 1413d24d4204SJose E. Roman 0, 1414d24d4204SJose E. Roman 0, 1415d24d4204SJose E. Roman 0, 1416d24d4204SJose E. Roman /*140*/0, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman 0, 1420d24d4204SJose E. Roman 0, 1421d24d4204SJose E. Roman /*145*/0, 1422d24d4204SJose E. Roman 0, 1423d24d4204SJose E. Roman 0 1424d24d4204SJose E. Roman }; 1425d24d4204SJose E. Roman 1426d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1427d24d4204SJose E. Roman { 1428d24d4204SJose E. Roman PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1429d24d4204SJose E. Roman PetscInt size=stash->size,nsends; 1430d24d4204SJose E. Roman PetscInt count,*sindices,**rindices,i,j,l; 1431d24d4204SJose E. Roman PetscScalar **rvalues,*svalues; 1432d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1433d24d4204SJose E. Roman MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1434d24d4204SJose E. Roman PetscMPIInt *sizes,*nlengths,nreceives; 1435d24d4204SJose E. Roman PetscInt *sp_idx,*sp_idy; 1436d24d4204SJose E. Roman PetscScalar *sp_val; 1437d24d4204SJose E. Roman PetscMatStashSpace space,space_next; 1438d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1439d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1440d24d4204SJose E. Roman 1441d24d4204SJose E. Roman PetscFunctionBegin; 1442d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1443d24d4204SJose E. Roman InsertMode addv; 14445f80ce2aSJacob Faibussowitsch CHKERRMPI(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 14452c71b3e2SJacob Faibussowitsch PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1446d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1447d24d4204SJose E. Roman } 1448d24d4204SJose E. Roman 1449d24d4204SJose E. Roman bs2 = stash->bs*stash->bs; 1450d24d4204SJose E. Roman 1451d24d4204SJose E. Roman /* first count number of contributors to each processor */ 14525f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(size,&nlengths)); 14535f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(stash->n+1,&owner)); 1454d24d4204SJose E. Roman 1455d24d4204SJose E. Roman i = j = 0; 1456d24d4204SJose E. Roman space = stash->space_head; 1457d24d4204SJose E. Roman while (space) { 1458d24d4204SJose E. Roman space_next = space->next; 1459d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 14605f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(space->idx[l]+1,&gridx)); 14615f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(space->idy[l]+1,&gcidx)); 1462d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1463d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1464d24d4204SJose E. Roman nlengths[j]++; owner[i] = j; 1465d24d4204SJose E. Roman i++; 1466d24d4204SJose E. Roman } 1467d24d4204SJose E. Roman space = space_next; 1468d24d4204SJose E. Roman } 1469d24d4204SJose E. Roman 1470d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 14715f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(size,&sizes)); 1472d24d4204SJose E. Roman for (i=0, nsends=0; i<size; i++) { 1473d24d4204SJose E. Roman if (nlengths[i]) { 1474d24d4204SJose E. Roman sizes[i] = 1; nsends++; 1475d24d4204SJose E. Roman } 1476d24d4204SJose E. Roman } 1477d24d4204SJose E. Roman 1478d24d4204SJose E. Roman {PetscMPIInt *onodes,*olengths; 1479d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 14805f80ce2aSJacob Faibussowitsch CHKERRQ(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 14815f80ce2aSJacob Faibussowitsch CHKERRQ(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 1482d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1483d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] *=2; 14845f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 1485d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1486d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 14875f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 14885f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(onodes)); 14895f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(olengths));} 1490d24d4204SJose E. Roman 1491d24d4204SJose E. Roman /* do sends: 1492d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1493d24d4204SJose E. Roman the ith processor 1494d24d4204SJose E. Roman */ 14955f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 14965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(2*nsends,&send_waits)); 14975f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(size,&startv,size,&starti)); 1498d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 1499d24d4204SJose E. Roman startv[0] = 0; starti[0] = 0; 1500d24d4204SJose E. Roman for (i=1; i<size; i++) { 1501d24d4204SJose E. Roman startv[i] = startv[i-1] + nlengths[i-1]; 1502d24d4204SJose E. Roman starti[i] = starti[i-1] + 2*nlengths[i-1]; 1503d24d4204SJose E. Roman } 1504d24d4204SJose E. Roman 1505d24d4204SJose E. Roman i = 0; 1506d24d4204SJose E. Roman space = stash->space_head; 1507d24d4204SJose E. Roman while (space) { 1508d24d4204SJose E. Roman space_next = space->next; 1509d24d4204SJose E. Roman sp_idx = space->idx; 1510d24d4204SJose E. Roman sp_idy = space->idy; 1511d24d4204SJose E. Roman sp_val = space->val; 1512d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1513d24d4204SJose E. Roman j = owner[i]; 1514d24d4204SJose E. Roman if (bs2 == 1) { 1515d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1516d24d4204SJose E. Roman } else { 1517d24d4204SJose E. Roman PetscInt k; 1518d24d4204SJose E. Roman PetscScalar *buf1,*buf2; 1519d24d4204SJose E. Roman buf1 = svalues+bs2*startv[j]; 1520d24d4204SJose E. Roman buf2 = space->val + bs2*l; 1521d24d4204SJose E. Roman for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1522d24d4204SJose E. Roman } 1523d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1524d24d4204SJose E. Roman sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1525d24d4204SJose E. Roman startv[j]++; 1526d24d4204SJose E. Roman starti[j]++; 1527d24d4204SJose E. Roman i++; 1528d24d4204SJose E. Roman } 1529d24d4204SJose E. Roman space = space_next; 1530d24d4204SJose E. Roman } 1531d24d4204SJose E. Roman startv[0] = 0; 1532d24d4204SJose E. Roman for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1533d24d4204SJose E. Roman 1534d24d4204SJose E. Roman for (i=0,count=0; i<size; i++) { 1535d24d4204SJose E. Roman if (sizes[i]) { 15365f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 15375f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 1538d24d4204SJose E. Roman } 1539d24d4204SJose E. Roman } 1540d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 15415f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends)); 1542d24d4204SJose E. Roman for (i=0; i<size; i++) { 1543d24d4204SJose E. Roman if (sizes[i]) { 15445f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 1545d24d4204SJose E. Roman } 1546d24d4204SJose E. Roman } 1547d24d4204SJose E. Roman #endif 15485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(nlengths)); 15495f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(owner)); 15505f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(startv,starti)); 15515f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sizes)); 1552d24d4204SJose E. Roman 1553d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 15545f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(2*nreceives,&recv_waits)); 1555d24d4204SJose E. Roman 1556d24d4204SJose E. Roman for (i=0; i<nreceives; i++) { 1557d24d4204SJose E. Roman recv_waits[2*i] = recv_waits1[i]; 1558d24d4204SJose E. Roman recv_waits[2*i+1] = recv_waits2[i]; 1559d24d4204SJose E. Roman } 1560d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1561d24d4204SJose E. Roman 15625f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(recv_waits1)); 15635f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(recv_waits2)); 1564d24d4204SJose E. Roman 1565d24d4204SJose E. Roman stash->svalues = svalues; 1566d24d4204SJose E. Roman stash->sindices = sindices; 1567d24d4204SJose E. Roman stash->rvalues = rvalues; 1568d24d4204SJose E. Roman stash->rindices = rindices; 1569d24d4204SJose E. Roman stash->send_waits = send_waits; 1570d24d4204SJose E. Roman stash->nsends = nsends; 1571d24d4204SJose E. Roman stash->nrecvs = nreceives; 1572d24d4204SJose E. Roman stash->reproduce_count = 0; 1573d24d4204SJose E. Roman PetscFunctionReturn(0); 1574d24d4204SJose E. Roman } 1575d24d4204SJose E. Roman 1576d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1577d24d4204SJose E. Roman { 1578d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1579d24d4204SJose E. Roman 1580d24d4204SJose E. Roman PetscFunctionBegin; 1581*28b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 15822c71b3e2SJacob Faibussowitsch PetscCheckFalse(mb<1 && mb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); 15832c71b3e2SJacob Faibussowitsch PetscCheckFalse(nb<1 && nb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); 15845f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 15855f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1586d24d4204SJose E. Roman PetscFunctionReturn(0); 1587d24d4204SJose E. Roman } 1588d24d4204SJose E. Roman 1589d24d4204SJose E. Roman /*@ 1590d24d4204SJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1591d24d4204SJose E. Roman the ScaLAPACK matrix 1592d24d4204SJose E. Roman 1593d24d4204SJose E. Roman Logically Collective on A 1594d24d4204SJose E. Roman 1595d8d19677SJose E. Roman Input Parameters: 1596d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1597d24d4204SJose E. Roman . mb - the row block size 1598d24d4204SJose E. Roman - nb - the column block size 1599d24d4204SJose E. Roman 1600d24d4204SJose E. Roman Level: intermediate 1601d24d4204SJose E. Roman 1602d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1603d24d4204SJose E. Roman @*/ 1604d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1605d24d4204SJose E. Roman { 1606d24d4204SJose E. Roman PetscFunctionBegin; 1607d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1608d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,mb,2); 1609d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,nb,3); 16105f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb))); 1611d24d4204SJose E. Roman PetscFunctionReturn(0); 1612d24d4204SJose E. Roman } 1613d24d4204SJose E. Roman 1614d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1615d24d4204SJose E. Roman { 1616d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1617d24d4204SJose E. Roman 1618d24d4204SJose E. Roman PetscFunctionBegin; 1619d24d4204SJose E. Roman if (mb) *mb = a->mb; 1620d24d4204SJose E. Roman if (nb) *nb = a->nb; 1621d24d4204SJose E. Roman PetscFunctionReturn(0); 1622d24d4204SJose E. Roman } 1623d24d4204SJose E. Roman 1624d24d4204SJose E. Roman /*@ 1625d24d4204SJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1626d24d4204SJose E. Roman the ScaLAPACK matrix 1627d24d4204SJose E. Roman 1628d24d4204SJose E. Roman Not collective 1629d24d4204SJose E. Roman 1630d24d4204SJose E. Roman Input Parameter: 1631d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1632d24d4204SJose E. Roman 1633d24d4204SJose E. Roman Output Parameters: 1634d24d4204SJose E. Roman + mb - the row block size 1635d24d4204SJose E. Roman - nb - the column block size 1636d24d4204SJose E. Roman 1637d24d4204SJose E. Roman Level: intermediate 1638d24d4204SJose E. Roman 1639d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1640d24d4204SJose E. Roman @*/ 1641d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1642d24d4204SJose E. Roman { 1643d24d4204SJose E. Roman PetscFunctionBegin; 1644d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 16455f80ce2aSJacob Faibussowitsch CHKERRQ(PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb))); 1646d24d4204SJose E. Roman PetscFunctionReturn(0); 1647d24d4204SJose E. Roman } 1648d24d4204SJose E. Roman 1649d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1650d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1651d24d4204SJose E. Roman 1652d24d4204SJose E. Roman /*MC 1653d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1654d24d4204SJose E. Roman 1655d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1656d24d4204SJose E. Roman 1657d24d4204SJose E. Roman Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1658d24d4204SJose E. Roman 1659d24d4204SJose E. Roman Options Database Keys: 1660d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1661d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1662d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1663d24d4204SJose E. Roman 1664d24d4204SJose E. Roman Level: beginner 1665d24d4204SJose E. Roman 1666d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL 1667d24d4204SJose E. Roman M*/ 1668d24d4204SJose E. Roman 1669d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1670d24d4204SJose E. Roman { 1671d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1672d24d4204SJose E. Roman PetscErrorCode ierr; 1673d24d4204SJose E. Roman PetscBool flg,flg1; 1674d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1675d24d4204SJose E. Roman MPI_Comm icomm; 1676d24d4204SJose E. Roman PetscBLASInt nprow,npcol,myrow,mycol; 1677d24d4204SJose E. Roman PetscInt optv1,k=2,array[2]={0,0}; 1678d24d4204SJose E. Roman PetscMPIInt size; 1679d24d4204SJose E. Roman 1680d24d4204SJose E. Roman PetscFunctionBegin; 16815f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 1682d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1683d24d4204SJose E. Roman 16845f80ce2aSJacob Faibussowitsch CHKERRQ(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); 1685d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1686d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1687d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1688d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1689d24d4204SJose E. Roman 16905f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNewLog(A,&a)); 1691d24d4204SJose E. Roman A->data = (void*)a; 1692d24d4204SJose E. Roman 1693d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1694d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 16955f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); 16965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 16975f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); 1698d24d4204SJose E. Roman } 16995f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 17005f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1701d24d4204SJose E. Roman if (!flg) { 17025f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNewLog(A,&grid)); 1703d24d4204SJose E. Roman 17045f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(icomm,&size)); 1705d24d4204SJose E. Roman grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1706d24d4204SJose E. Roman 1707d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 17085f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); 1709d24d4204SJose E. Roman if (flg1) { 17102c71b3e2SJacob Faibussowitsch PetscCheckFalse(size % optv1,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); 1711d24d4204SJose E. Roman grid->nprow = optv1; 1712d24d4204SJose E. Roman } 1713d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1714d24d4204SJose E. Roman 1715d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1716d24d4204SJose E. Roman grid->npcol = size/grid->nprow; 17175f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(grid->nprow,&nprow)); 17185f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(grid->npcol,&npcol)); 1719f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1720d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1721d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1722d24d4204SJose E. Roman grid->grid_refct = 1; 1723d24d4204SJose E. Roman grid->nprow = nprow; 1724d24d4204SJose E. Roman grid->npcol = npcol; 1725d24d4204SJose E. Roman grid->myrow = myrow; 1726d24d4204SJose E. Roman grid->mycol = mycol; 1727d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1728f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1729d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1730f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1731d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol,"R",size,1); 17325f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); 1733d24d4204SJose E. Roman 1734d24d4204SJose E. Roman } else grid->grid_refct++; 17355f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDestroy(&icomm)); 1736d24d4204SJose E. Roman a->grid = grid; 1737d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1738d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1739d24d4204SJose E. Roman 1740d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 17415f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); 1742d24d4204SJose E. Roman if (flg) { 1743d24d4204SJose E. Roman a->mb = array[0]; 1744d24d4204SJose E. Roman a->nb = (k>1)? array[1]: a->mb; 1745d24d4204SJose E. Roman } 1746d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1747d24d4204SJose E. Roman 17485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); 17495f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); 17505f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); 17515f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); 1752d24d4204SJose E. Roman PetscFunctionReturn(0); 1753d24d4204SJose E. Roman } 1754d24d4204SJose E. Roman 1755d24d4204SJose E. Roman /*@C 1756d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1757d24d4204SJose E. Roman (2D block cyclic distribution). 1758d24d4204SJose E. Roman 1759d24d4204SJose E. Roman Collective 1760d24d4204SJose E. Roman 1761d24d4204SJose E. Roman Input Parameters: 1762d24d4204SJose E. Roman + comm - MPI communicator 1763d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1764d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1765d24d4204SJose E. Roman . M - number of global rows 1766d24d4204SJose E. Roman . N - number of global columns 1767d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1768d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1769d24d4204SJose E. Roman 1770d24d4204SJose E. Roman Output Parameter: 1771d24d4204SJose E. Roman . A - the matrix 1772d24d4204SJose E. Roman 1773d24d4204SJose E. Roman Options Database Keys: 1774d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1775d24d4204SJose E. Roman 1776d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1777d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1778d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1779d24d4204SJose E. Roman 1780d24d4204SJose E. Roman Notes: 1781d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1782d24d4204SJose E. Roman is chosen. 1783d24d4204SJose E. Roman 1784d24d4204SJose E. Roman Storage Information: 1785d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1786d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1787d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1788d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1789d24d4204SJose E. Roman 1790d24d4204SJose E. Roman Level: intermediate 1791d24d4204SJose E. Roman 1792d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1793d24d4204SJose E. Roman @*/ 1794d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1795d24d4204SJose E. Roman { 1796d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1797d24d4204SJose E. Roman PetscInt m,n; 1798d24d4204SJose E. Roman 1799d24d4204SJose E. Roman PetscFunctionBegin; 18005f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,A)); 18015f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(*A,MATSCALAPACK)); 18022c71b3e2SJacob Faibussowitsch PetscCheckFalse(M==PETSC_DECIDE || N==PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1803d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1804d24d4204SJose E. Roman m = PETSC_DECIDE; 18055f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 1806d24d4204SJose E. Roman n = PETSC_DECIDE; 18075f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 18085f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(*A,m,n,M,N)); 1809d24d4204SJose E. Roman a = (Mat_ScaLAPACK*)(*A)->data; 18105f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(M,&a->M)); 18115f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(N,&a->N)); 18125f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 18135f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 18145f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(rsrc,&a->rsrc)); 18155f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(csrc,&a->csrc)); 18165f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(*A)); 1817d24d4204SJose E. Roman PetscFunctionReturn(0); 1818d24d4204SJose E. Roman } 1819