1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n" 827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n" 927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n" 1027e75052SPierre Jolivet " YEAR = 1997\n" 1127e75052SPierre Jolivet "}\n"; 1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE; 1327e75052SPierre Jolivet 14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 15d24d4204SJose E. Roman 16d24d4204SJose E. Roman /* 17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19d24d4204SJose E. Roman */ 20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21d24d4204SJose E. Roman 22f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23f7ec113fSDamian Marek { 24f7ec113fSDamian Marek PetscErrorCode ierr; 25f7ec113fSDamian Marek 26f7ec113fSDamian Marek PetscFunctionBegin; 27f7ec113fSDamian Marek ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr); 2855b25c41SPierre Jolivet ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 29f7ec113fSDamian Marek PetscFunctionReturn(0); 30f7ec113fSDamian Marek } 31f7ec113fSDamian Marek 32d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 33d24d4204SJose E. Roman { 34d24d4204SJose E. Roman PetscErrorCode ierr; 35d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 36d24d4204SJose E. Roman PetscBool iascii; 37d24d4204SJose E. Roman PetscViewerFormat format; 38d24d4204SJose E. Roman Mat Adense; 39d24d4204SJose E. Roman 40d24d4204SJose E. Roman PetscFunctionBegin; 41d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 42d24d4204SJose E. Roman if (iascii) { 43d24d4204SJose E. Roman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 44d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 45d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 46d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 47d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 48d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 49d24d4204SJose E. Roman PetscFunctionReturn(0); 50d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 51d24d4204SJose E. Roman PetscFunctionReturn(0); 52d24d4204SJose E. Roman } 53d24d4204SJose E. Roman } 54d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 55d24d4204SJose E. Roman ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 56d24d4204SJose E. Roman ierr = MatView(Adense,viewer);CHKERRQ(ierr); 57d24d4204SJose E. Roman ierr = MatDestroy(&Adense);CHKERRQ(ierr); 58d24d4204SJose E. Roman PetscFunctionReturn(0); 59d24d4204SJose E. Roman } 60d24d4204SJose E. Roman 61d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 62d24d4204SJose E. Roman { 63d24d4204SJose E. Roman PetscErrorCode ierr; 64d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 65d24d4204SJose E. Roman PetscLogDouble isend[2],irecv[2]; 66d24d4204SJose E. Roman 67d24d4204SJose E. Roman PetscFunctionBegin; 68d24d4204SJose E. Roman info->block_size = 1.0; 69d24d4204SJose E. Roman 70d24d4204SJose E. Roman isend[0] = a->lld*a->locc; /* locally allocated */ 71d24d4204SJose E. Roman isend[1] = a->locr*a->locc; /* used submatrix */ 72d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 73d24d4204SJose E. Roman info->nz_allocated = isend[0]; 74d24d4204SJose E. Roman info->nz_used = isend[1]; 75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 76820f2d46SBarry Smith ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 77d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 78d24d4204SJose E. Roman info->nz_used = irecv[1]; 79d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 80820f2d46SBarry Smith ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 81d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 82d24d4204SJose E. Roman info->nz_used = irecv[1]; 83d24d4204SJose E. Roman } 84d24d4204SJose E. Roman 85d24d4204SJose E. Roman info->nz_unneeded = 0; 86d24d4204SJose E. Roman info->assemblies = A->num_ass; 87d24d4204SJose E. Roman info->mallocs = 0; 88d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 89d24d4204SJose E. Roman info->fill_ratio_given = 0; 90d24d4204SJose E. Roman info->fill_ratio_needed = 0; 91d24d4204SJose E. Roman info->factor_mallocs = 0; 92d24d4204SJose E. Roman PetscFunctionReturn(0); 93d24d4204SJose E. Roman } 94d24d4204SJose E. Roman 95d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 96d24d4204SJose E. Roman { 97d24d4204SJose E. Roman PetscFunctionBegin; 98d24d4204SJose E. Roman switch (op) { 99d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 100d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 101d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 102d24d4204SJose E. Roman case MAT_SYMMETRIC: 103d24d4204SJose E. Roman case MAT_SORTED_FULL: 104d24d4204SJose E. Roman case MAT_HERMITIAN: 105d24d4204SJose E. Roman break; 106d24d4204SJose E. Roman default: 107d24d4204SJose E. Roman SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 108d24d4204SJose E. Roman } 109d24d4204SJose E. Roman PetscFunctionReturn(0); 110d24d4204SJose E. Roman } 111d24d4204SJose E. Roman 112d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 113d24d4204SJose E. Roman { 114d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 115d24d4204SJose E. Roman PetscErrorCode ierr; 116d24d4204SJose E. Roman PetscInt i,j; 117d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 118d24d4204SJose E. Roman 119d24d4204SJose E. Roman PetscFunctionBegin; 120d24d4204SJose E. Roman for (i=0;i<nr;i++) { 121d24d4204SJose E. Roman if (rows[i] < 0) continue; 122d24d4204SJose E. Roman ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 123d24d4204SJose E. Roman for (j=0;j<nc;j++) { 124d24d4204SJose E. Roman if (cols[j] < 0) continue; 125d24d4204SJose E. Roman ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 126d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 127d24d4204SJose E. Roman if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 128d24d4204SJose E. Roman switch (imode) { 129d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 130d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 131d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 132d24d4204SJose E. Roman } 133d24d4204SJose E. Roman } else { 134d24d4204SJose E. Roman if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 135d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 136d24d4204SJose E. Roman ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 137d24d4204SJose E. Roman } 138d24d4204SJose E. Roman } 139d24d4204SJose E. Roman } 140d24d4204SJose E. Roman PetscFunctionReturn(0); 141d24d4204SJose E. Roman } 142d24d4204SJose E. Roman 143d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 144d24d4204SJose E. Roman { 145d24d4204SJose E. Roman PetscErrorCode ierr; 146d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 147d24d4204SJose E. Roman PetscScalar *x2d,*y2d,alpha=1.0; 148d24d4204SJose E. Roman const PetscInt *ranges; 149d24d4204SJose E. Roman PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 150d24d4204SJose E. Roman 151d24d4204SJose E. Roman PetscFunctionBegin; 152d24d4204SJose E. Roman if (transpose) { 153d24d4204SJose E. Roman 154d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 155d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 156d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 157d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 158d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 159d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 160d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 161d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 162d24d4204SJose E. Roman ylld = 1; 163d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 164d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman /* allocate 2d vectors */ 167d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 168d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 169d24d4204SJose E. Roman ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 170d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 171d24d4204SJose E. Roman 172d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 173d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 174d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 175d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 176d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 177d24d4204SJose E. Roman 178d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 179d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 180d24d4204SJose E. Roman 181d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 182d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 183d24d4204SJose E. Roman 184d24d4204SJose E. Roman /* call PBLAS subroutine */ 185d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 186d24d4204SJose E. Roman 187d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 188d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 189d24d4204SJose E. Roman 190d24d4204SJose E. Roman } else { /* non-transpose */ 191d24d4204SJose E. Roman 192d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 193d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 194d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 195d24d4204SJose E. Roman xlld = 1; 196d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 197d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 198d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 199d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 200d24d4204SJose E. Roman ylld = PetscMax(1,A->rmap->n); 201d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 202d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 203d24d4204SJose E. Roman 204d24d4204SJose E. Roman /* allocate 2d vectors */ 205d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 206d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 207d24d4204SJose E. Roman ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 208d24d4204SJose E. Roman ylld = PetscMax(1,lszy); 209d24d4204SJose E. Roman 210d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 211d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 212d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 213d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 214d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 215d24d4204SJose E. Roman 216d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 217d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 218d24d4204SJose E. Roman 219d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 220d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 221d24d4204SJose E. Roman 222d24d4204SJose E. Roman /* call PBLAS subroutine */ 223d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 224d24d4204SJose E. Roman 225d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 226d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 227d24d4204SJose E. Roman 228d24d4204SJose E. Roman } 229d24d4204SJose E. Roman ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 230d24d4204SJose E. Roman PetscFunctionReturn(0); 231d24d4204SJose E. Roman } 232d24d4204SJose E. Roman 233d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 234d24d4204SJose E. Roman { 235d24d4204SJose E. Roman PetscErrorCode ierr; 236d24d4204SJose E. Roman const PetscScalar *xarray; 237d24d4204SJose E. Roman PetscScalar *yarray; 238d24d4204SJose E. Roman 239d24d4204SJose E. Roman PetscFunctionBegin; 240d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 241d24d4204SJose E. Roman ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 242d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 243d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 244d24d4204SJose E. Roman ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 245d24d4204SJose E. Roman PetscFunctionReturn(0); 246d24d4204SJose E. Roman } 247d24d4204SJose E. Roman 248d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 249d24d4204SJose E. Roman { 250d24d4204SJose E. Roman PetscErrorCode ierr; 251d24d4204SJose E. Roman const PetscScalar *xarray; 252d24d4204SJose E. Roman PetscScalar *yarray; 253d24d4204SJose E. Roman 254d24d4204SJose E. Roman PetscFunctionBegin; 255d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 256d24d4204SJose E. Roman ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 257d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 258d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 259d24d4204SJose E. Roman ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 260d24d4204SJose E. Roman PetscFunctionReturn(0); 261d24d4204SJose E. Roman } 262d24d4204SJose E. Roman 263d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 264d24d4204SJose E. Roman { 265d24d4204SJose E. Roman PetscErrorCode ierr; 266d24d4204SJose E. Roman const PetscScalar *xarray; 267d24d4204SJose E. Roman PetscScalar *zarray; 268d24d4204SJose E. Roman 269d24d4204SJose E. Roman PetscFunctionBegin; 270d24d4204SJose E. Roman if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 271d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 272d24d4204SJose E. Roman ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 273d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 274d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 275d24d4204SJose E. Roman ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 276d24d4204SJose E. Roman PetscFunctionReturn(0); 277d24d4204SJose E. Roman } 278d24d4204SJose E. Roman 279d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 280d24d4204SJose E. Roman { 281d24d4204SJose E. Roman PetscErrorCode ierr; 282d24d4204SJose E. Roman const PetscScalar *xarray; 283d24d4204SJose E. Roman PetscScalar *zarray; 284d24d4204SJose E. Roman 285d24d4204SJose E. Roman PetscFunctionBegin; 286d24d4204SJose E. Roman if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 287d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 288d24d4204SJose E. Roman ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 289d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 290d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 291d24d4204SJose E. Roman ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 292d24d4204SJose E. Roman PetscFunctionReturn(0); 293d24d4204SJose E. Roman } 294d24d4204SJose E. Roman 295d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 296d24d4204SJose E. Roman { 297d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 298d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 299d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 300d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 301d24d4204SJose E. Roman PetscBLASInt one=1; 302d24d4204SJose E. Roman 303d24d4204SJose E. Roman PetscFunctionBegin; 304d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 305d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 306d24d4204SJose E. Roman PetscFunctionReturn(0); 307d24d4204SJose E. Roman } 308d24d4204SJose E. Roman 309d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 310d24d4204SJose E. Roman { 311d24d4204SJose E. Roman PetscErrorCode ierr; 312d24d4204SJose E. Roman 313d24d4204SJose E. Roman PetscFunctionBegin; 314d24d4204SJose E. Roman ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 315d24d4204SJose E. Roman ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 316d24d4204SJose E. Roman ierr = MatSetUp(C);CHKERRQ(ierr); 317d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 318d24d4204SJose E. Roman PetscFunctionReturn(0); 319d24d4204SJose E. Roman } 320d24d4204SJose E. Roman 321d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 322d24d4204SJose E. Roman { 323d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 324d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 325d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 326d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 327d24d4204SJose E. Roman PetscBLASInt one=1; 328d24d4204SJose E. Roman 329d24d4204SJose E. Roman PetscFunctionBegin; 330d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 331d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 332d24d4204SJose E. Roman PetscFunctionReturn(0); 333d24d4204SJose E. Roman } 334d24d4204SJose E. Roman 335d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 336d24d4204SJose E. Roman { 337d24d4204SJose E. Roman PetscErrorCode ierr; 338d24d4204SJose E. Roman 339d24d4204SJose E. Roman PetscFunctionBegin; 340d24d4204SJose E. Roman ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 341d24d4204SJose E. Roman ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 342d24d4204SJose E. Roman ierr = MatSetUp(C);CHKERRQ(ierr); 343d24d4204SJose E. Roman PetscFunctionReturn(0); 344d24d4204SJose E. Roman } 345d24d4204SJose E. Roman 346d24d4204SJose E. Roman /* --------------------------------------- */ 347d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 348d24d4204SJose E. Roman { 349d24d4204SJose E. Roman PetscFunctionBegin; 350d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 351d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 352d24d4204SJose E. Roman PetscFunctionReturn(0); 353d24d4204SJose E. Roman } 354d24d4204SJose E. Roman 355d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 356d24d4204SJose E. Roman { 357d24d4204SJose E. Roman PetscFunctionBegin; 358d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 359d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 360d24d4204SJose E. Roman PetscFunctionReturn(0); 361d24d4204SJose E. Roman } 362d24d4204SJose E. Roman 363d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 364d24d4204SJose E. Roman { 365d24d4204SJose E. Roman PetscErrorCode ierr; 366d24d4204SJose E. Roman Mat_Product *product = C->product; 367d24d4204SJose E. Roman 368d24d4204SJose E. Roman PetscFunctionBegin; 369d24d4204SJose E. Roman switch (product->type) { 370d24d4204SJose E. Roman case MATPRODUCT_AB: 371d24d4204SJose E. Roman ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 372d24d4204SJose E. Roman break; 373d24d4204SJose E. Roman case MATPRODUCT_ABt: 374d24d4204SJose E. Roman ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 375d24d4204SJose E. Roman break; 376d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 377d24d4204SJose E. Roman } 378d24d4204SJose E. Roman PetscFunctionReturn(0); 379d24d4204SJose E. Roman } 380d24d4204SJose E. Roman /* --------------------------------------- */ 381d24d4204SJose E. Roman 382d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 383d24d4204SJose E. Roman { 384d24d4204SJose E. Roman PetscErrorCode ierr; 385d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 386d24d4204SJose E. Roman PetscScalar *darray,*d2d,v; 387d24d4204SJose E. Roman const PetscInt *ranges; 388d24d4204SJose E. Roman PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 389d24d4204SJose E. Roman 390d24d4204SJose E. Roman PetscFunctionBegin; 391d24d4204SJose E. Roman ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 392d24d4204SJose E. Roman 393d24d4204SJose E. Roman if (A->rmap->N<=A->cmap->N) { /* row version */ 394d24d4204SJose E. Roman 395d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 396d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 397d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 398d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 399d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 400d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 401d24d4204SJose E. Roman 402d24d4204SJose E. Roman /* allocate 2d vector */ 403d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 404d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 405d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 406d24d4204SJose E. Roman 407d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 408d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 409d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 410d24d4204SJose E. Roman 411d24d4204SJose E. Roman /* collect diagonal */ 412d24d4204SJose E. Roman for (j=1;j<=a->M;j++) { 413d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 414d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 415d24d4204SJose E. Roman } 416d24d4204SJose E. Roman 417d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 418d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 419d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 420d24d4204SJose E. Roman 421d24d4204SJose E. Roman } else { /* column version */ 422d24d4204SJose E. Roman 423d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 424d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 425d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 426d24d4204SJose E. Roman dlld = 1; 427d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 428d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 429d24d4204SJose E. Roman 430d24d4204SJose E. Roman /* allocate 2d vector */ 431d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 432d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 433d24d4204SJose E. Roman 434d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 435d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 436d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 437d24d4204SJose E. Roman 438d24d4204SJose E. Roman /* collect diagonal */ 439d24d4204SJose E. Roman for (j=1;j<=a->N;j++) { 440d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 441d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 442d24d4204SJose E. Roman } 443d24d4204SJose E. Roman 444d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 445d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 446d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 447d24d4204SJose E. Roman } 448d24d4204SJose E. Roman 449d24d4204SJose E. Roman ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 450d24d4204SJose E. Roman ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 451d24d4204SJose E. Roman ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 452d24d4204SJose E. Roman PetscFunctionReturn(0); 453d24d4204SJose E. Roman } 454d24d4204SJose E. Roman 455d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 456d24d4204SJose E. Roman { 457d24d4204SJose E. Roman PetscErrorCode ierr; 458d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 459d24d4204SJose E. Roman const PetscScalar *d; 460d24d4204SJose E. Roman const PetscInt *ranges; 461d24d4204SJose E. Roman PetscScalar *d2d; 462d24d4204SJose E. Roman PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 463d24d4204SJose E. Roman 464d24d4204SJose E. Roman PetscFunctionBegin; 465d24d4204SJose E. Roman if (R) { 466d24d4204SJose E. Roman ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 467d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 468d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 469d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 470d24d4204SJose E. Roman dlld = 1; 471d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 472d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 473d24d4204SJose E. Roman 474d24d4204SJose E. Roman /* allocate 2d vector */ 475d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 476d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 477d24d4204SJose E. Roman 478d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 479d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 480d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 481d24d4204SJose E. Roman 482d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 483d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 484d24d4204SJose E. Roman 485d24d4204SJose E. Roman /* broadcast along process columns */ 486d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 487d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 488d24d4204SJose E. Roman 489d24d4204SJose E. Roman /* local scaling */ 490d24d4204SJose E. Roman for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 491d24d4204SJose E. Roman 492d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 493d24d4204SJose E. Roman ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 494d24d4204SJose E. Roman } 495d24d4204SJose E. Roman if (L) { 496d24d4204SJose E. Roman ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 497d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 498d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 499d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 500d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 501d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 502d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 503d24d4204SJose E. Roman 504d24d4204SJose E. Roman /* allocate 2d vector */ 505d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 506d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 507d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 508d24d4204SJose E. Roman 509d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 510d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 511d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 512d24d4204SJose E. Roman 513d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 514d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 515d24d4204SJose E. Roman 516d24d4204SJose E. Roman /* broadcast along process rows */ 517d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 518d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 519d24d4204SJose E. Roman 520d24d4204SJose E. Roman /* local scaling */ 521d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 522d24d4204SJose E. Roman 523d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 524d24d4204SJose E. Roman ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 525d24d4204SJose E. Roman } 526d24d4204SJose E. Roman PetscFunctionReturn(0); 527d24d4204SJose E. Roman } 528d24d4204SJose E. Roman 529d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 530d24d4204SJose E. Roman { 531d24d4204SJose E. Roman PetscFunctionBegin; 532d24d4204SJose E. Roman *missing = PETSC_FALSE; 533d24d4204SJose E. Roman PetscFunctionReturn(0); 534d24d4204SJose E. Roman } 535d24d4204SJose E. Roman 536d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 537d24d4204SJose E. Roman { 538d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 539d24d4204SJose E. Roman PetscBLASInt n,one=1; 540d24d4204SJose E. Roman 541d24d4204SJose E. Roman PetscFunctionBegin; 542d24d4204SJose E. Roman n = x->lld*x->locc; 543d24d4204SJose E. Roman PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 544d24d4204SJose E. Roman PetscFunctionReturn(0); 545d24d4204SJose E. Roman } 546d24d4204SJose E. Roman 547d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 548d24d4204SJose E. Roman { 549d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 550d24d4204SJose E. Roman PetscBLASInt i,n; 551d24d4204SJose E. Roman PetscScalar v; 552d24d4204SJose E. Roman 553d24d4204SJose E. Roman PetscFunctionBegin; 554d24d4204SJose E. Roman n = PetscMin(x->M,x->N); 555d24d4204SJose E. Roman for (i=1;i<=n;i++) { 556d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 557d24d4204SJose E. Roman v += alpha; 558d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 559d24d4204SJose E. Roman } 560d24d4204SJose E. Roman PetscFunctionReturn(0); 561d24d4204SJose E. Roman } 562d24d4204SJose E. Roman 563d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 564d24d4204SJose E. Roman { 565d24d4204SJose E. Roman PetscErrorCode ierr; 566d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 567d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 568d24d4204SJose E. Roman PetscBLASInt one=1; 569d24d4204SJose E. Roman PetscScalar beta=1.0; 570d24d4204SJose E. Roman 571d24d4204SJose E. Roman PetscFunctionBegin; 572d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y,1,X,3); 573d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 574d24d4204SJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 575d24d4204SJose E. Roman PetscFunctionReturn(0); 576d24d4204SJose E. Roman } 577d24d4204SJose E. Roman 578d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 579d24d4204SJose E. Roman { 580d24d4204SJose E. Roman PetscErrorCode ierr; 581d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 582d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 583d24d4204SJose E. Roman 584d24d4204SJose E. Roman PetscFunctionBegin; 585d24d4204SJose E. Roman ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 586d24d4204SJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 587d24d4204SJose E. Roman PetscFunctionReturn(0); 588d24d4204SJose E. Roman } 589d24d4204SJose E. Roman 590d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 591d24d4204SJose E. Roman { 592d24d4204SJose E. Roman Mat Bs; 593d24d4204SJose E. Roman MPI_Comm comm; 594d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 595d24d4204SJose E. Roman PetscErrorCode ierr; 596d24d4204SJose E. Roman 597d24d4204SJose E. Roman PetscFunctionBegin; 598d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599d24d4204SJose E. Roman ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 600d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 601d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 602d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 603d24d4204SJose E. Roman b->M = a->M; 604d24d4204SJose E. Roman b->N = a->N; 605d24d4204SJose E. Roman b->mb = a->mb; 606d24d4204SJose E. Roman b->nb = a->nb; 607d24d4204SJose E. Roman b->rsrc = a->rsrc; 608d24d4204SJose E. Roman b->csrc = a->csrc; 609d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 610d24d4204SJose E. Roman *B = Bs; 611d24d4204SJose E. Roman if (op == MAT_COPY_VALUES) { 612d24d4204SJose E. Roman ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 613d24d4204SJose E. Roman } 614d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 615d24d4204SJose E. Roman PetscFunctionReturn(0); 616d24d4204SJose E. Roman } 617d24d4204SJose E. Roman 618d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 619d24d4204SJose E. Roman { 620d24d4204SJose E. Roman PetscErrorCode ierr; 621d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 622d24d4204SJose E. Roman Mat Bs = *B; 623d24d4204SJose E. Roman PetscBLASInt one=1; 624d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 625d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 626d24d4204SJose E. Roman PetscInt i,j; 627d24d4204SJose E. Roman #endif 628d24d4204SJose E. Roman 629d24d4204SJose E. Roman PetscFunctionBegin; 630d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 63159172f18SJose E. Roman ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr); 632d24d4204SJose E. Roman *B = Bs; 633d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 634d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 635d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 636d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 637d24d4204SJose E. Roman /* undo conjugation */ 638d24d4204SJose E. Roman for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 639d24d4204SJose E. Roman #endif 640d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 641d24d4204SJose E. Roman PetscFunctionReturn(0); 642d24d4204SJose E. Roman } 643d24d4204SJose E. Roman 644d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 645d24d4204SJose E. Roman { 646d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 647d24d4204SJose E. Roman PetscInt i,j; 648d24d4204SJose E. Roman 649d24d4204SJose E. Roman PetscFunctionBegin; 650d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 651d24d4204SJose E. Roman PetscFunctionReturn(0); 652d24d4204SJose E. Roman } 653d24d4204SJose E. Roman 654d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 655d24d4204SJose E. Roman { 656d24d4204SJose E. Roman PetscErrorCode ierr; 657d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 658d24d4204SJose E. Roman Mat Bs = *B; 659d24d4204SJose E. Roman PetscBLASInt one=1; 660d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 661d24d4204SJose E. Roman 662d24d4204SJose E. Roman PetscFunctionBegin; 663d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 66459172f18SJose E. Roman ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr); 665d24d4204SJose E. Roman *B = Bs; 666d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 667d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 668d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 669d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 670d24d4204SJose E. Roman PetscFunctionReturn(0); 671d24d4204SJose E. Roman } 672d24d4204SJose E. Roman 673d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 674d24d4204SJose E. Roman { 675d24d4204SJose E. Roman PetscErrorCode ierr; 676d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 677d24d4204SJose E. Roman PetscScalar *x,*x2d; 678d24d4204SJose E. Roman const PetscInt *ranges; 679d24d4204SJose E. Roman PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 680d24d4204SJose E. Roman 681d24d4204SJose E. Roman PetscFunctionBegin; 682d24d4204SJose E. Roman ierr = VecCopy(B,X);CHKERRQ(ierr); 683d24d4204SJose E. Roman ierr = VecGetArray(X,&x);CHKERRQ(ierr); 684d24d4204SJose E. Roman 685d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 686d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 687d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 688d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 689d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 690d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 691d24d4204SJose E. Roman 692d24d4204SJose E. Roman /* allocate 2d vector */ 693d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 694d24d4204SJose E. Roman ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 695d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 696d24d4204SJose E. Roman 697d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 698d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 699d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 700d24d4204SJose E. Roman 701d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 702d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 703d24d4204SJose E. Roman 704d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 705d24d4204SJose E. Roman switch (A->factortype) { 706d24d4204SJose E. Roman case MAT_FACTOR_LU: 707d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 708d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 709d24d4204SJose E. Roman break; 710d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 711d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 712d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 713d24d4204SJose E. Roman break; 714d24d4204SJose E. Roman default: 715d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 716d24d4204SJose E. Roman } 717d24d4204SJose E. Roman 718d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 719d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 720d24d4204SJose E. Roman 721d24d4204SJose E. Roman ierr = PetscFree(x2d);CHKERRQ(ierr); 722d24d4204SJose E. Roman ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 723d24d4204SJose E. Roman PetscFunctionReturn(0); 724d24d4204SJose E. Roman } 725d24d4204SJose E. Roman 726d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 727d24d4204SJose E. Roman { 728d24d4204SJose E. Roman PetscErrorCode ierr; 729d24d4204SJose E. Roman 730d24d4204SJose E. Roman PetscFunctionBegin; 731d24d4204SJose E. Roman ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 732d24d4204SJose E. Roman ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 733d24d4204SJose E. Roman PetscFunctionReturn(0); 734d24d4204SJose E. Roman } 735d24d4204SJose E. Roman 736d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 737d24d4204SJose E. Roman { 738d24d4204SJose E. Roman PetscErrorCode ierr; 739d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 740d24d4204SJose E. Roman PetscBool flg1,flg2; 741d24d4204SJose E. Roman PetscBLASInt one=1,info; 742d24d4204SJose E. Roman 743d24d4204SJose E. Roman PetscFunctionBegin; 744d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 745d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 746d24d4204SJose E. Roman if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 747d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B,1,X,2); 748d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)B->data; 749d24d4204SJose E. Roman x = (Mat_ScaLAPACK*)X->data; 750d24d4204SJose E. Roman ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 751d24d4204SJose E. Roman 752d24d4204SJose E. Roman switch (A->factortype) { 753d24d4204SJose E. Roman case MAT_FACTOR_LU: 754d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 755d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 756d24d4204SJose E. Roman break; 757d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 758d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 759d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 760d24d4204SJose E. Roman break; 761d24d4204SJose E. Roman default: 762d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 763d24d4204SJose E. Roman } 764d24d4204SJose E. Roman PetscFunctionReturn(0); 765d24d4204SJose E. Roman } 766d24d4204SJose E. Roman 767d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 768d24d4204SJose E. Roman { 769d24d4204SJose E. Roman PetscErrorCode ierr; 770d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 771d24d4204SJose E. Roman PetscBLASInt one=1,info; 772d24d4204SJose E. Roman 773d24d4204SJose E. Roman PetscFunctionBegin; 774d24d4204SJose E. Roman if (!a->pivots) { 775d24d4204SJose E. Roman ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 776d24d4204SJose E. Roman ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 777d24d4204SJose E. Roman } 778d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 779d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf",info); 780d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 781d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 782d24d4204SJose E. Roman 783d24d4204SJose E. Roman ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 784d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 785d24d4204SJose E. Roman PetscFunctionReturn(0); 786d24d4204SJose E. Roman } 787d24d4204SJose E. Roman 788d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 789d24d4204SJose E. Roman { 790d24d4204SJose E. Roman PetscErrorCode ierr; 791d24d4204SJose E. Roman 792d24d4204SJose E. Roman PetscFunctionBegin; 793d24d4204SJose E. Roman ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 794d24d4204SJose E. Roman ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 795d24d4204SJose E. Roman PetscFunctionReturn(0); 796d24d4204SJose E. Roman } 797d24d4204SJose E. Roman 798d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 799d24d4204SJose E. Roman { 800d24d4204SJose E. Roman PetscFunctionBegin; 801d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 802d24d4204SJose E. Roman PetscFunctionReturn(0); 803d24d4204SJose E. Roman } 804d24d4204SJose E. Roman 805d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 806d24d4204SJose E. Roman { 807d24d4204SJose E. Roman PetscErrorCode ierr; 808d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 809d24d4204SJose E. Roman PetscBLASInt one=1,info; 810d24d4204SJose E. Roman 811d24d4204SJose E. Roman PetscFunctionBegin; 812d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 813d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf",info); 814d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 815d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 816d24d4204SJose E. Roman 817d24d4204SJose E. Roman ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 818d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 819d24d4204SJose E. Roman PetscFunctionReturn(0); 820d24d4204SJose E. Roman } 821d24d4204SJose E. Roman 822d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 823d24d4204SJose E. Roman { 824d24d4204SJose E. Roman PetscErrorCode ierr; 825d24d4204SJose E. Roman 826d24d4204SJose E. Roman PetscFunctionBegin; 827d24d4204SJose E. Roman ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 828d24d4204SJose E. Roman ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 829d24d4204SJose E. Roman PetscFunctionReturn(0); 830d24d4204SJose E. Roman } 831d24d4204SJose E. Roman 832d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 833d24d4204SJose E. Roman { 834d24d4204SJose E. Roman PetscFunctionBegin; 835d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 836d24d4204SJose E. Roman PetscFunctionReturn(0); 837d24d4204SJose E. Roman } 838d24d4204SJose E. Roman 839d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 840d24d4204SJose E. Roman { 841d24d4204SJose E. Roman PetscFunctionBegin; 842d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 843d24d4204SJose E. Roman PetscFunctionReturn(0); 844d24d4204SJose E. Roman } 845d24d4204SJose E. Roman 846d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 847d24d4204SJose E. Roman { 848d24d4204SJose E. Roman Mat B; 84959172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 850d24d4204SJose E. Roman PetscErrorCode ierr; 851d24d4204SJose E. Roman 852d24d4204SJose E. Roman PetscFunctionBegin; 853d24d4204SJose E. Roman /* Create the factorization matrix */ 85459172f18SJose E. Roman ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr); 85566e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 856d24d4204SJose E. Roman B->factortype = ftype; 857d24d4204SJose E. Roman ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 858d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 859d24d4204SJose E. Roman 860d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 861d24d4204SJose E. Roman *F = B; 862d24d4204SJose E. Roman PetscFunctionReturn(0); 863d24d4204SJose E. Roman } 864d24d4204SJose E. Roman 865d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 866d24d4204SJose E. Roman { 867d24d4204SJose E. Roman PetscErrorCode ierr; 868d24d4204SJose E. Roman 869d24d4204SJose E. Roman PetscFunctionBegin; 870d24d4204SJose E. Roman ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 871d24d4204SJose E. Roman ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 872d24d4204SJose E. Roman PetscFunctionReturn(0); 873d24d4204SJose E. Roman } 874d24d4204SJose E. Roman 875d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 876d24d4204SJose E. Roman { 877d24d4204SJose E. Roman PetscErrorCode ierr; 878d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 879d24d4204SJose E. Roman PetscBLASInt one=1,lwork=0; 880d24d4204SJose E. Roman const char *ntype; 881d68f4f38SPierre Jolivet PetscScalar *work=NULL,dummy; 882d24d4204SJose E. Roman 883d24d4204SJose E. Roman PetscFunctionBegin; 884d24d4204SJose E. Roman switch (type) { 885d24d4204SJose E. Roman case NORM_1: 886d24d4204SJose E. Roman ntype = "1"; 887d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 888d24d4204SJose E. Roman break; 889d24d4204SJose E. Roman case NORM_FROBENIUS: 890d24d4204SJose E. Roman ntype = "F"; 891d24d4204SJose E. Roman work = &dummy; 892d24d4204SJose E. Roman break; 893d24d4204SJose E. Roman case NORM_INFINITY: 894d24d4204SJose E. Roman ntype = "I"; 895d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 896d24d4204SJose E. Roman break; 897d24d4204SJose E. Roman default: 898d24d4204SJose E. Roman SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 899d24d4204SJose E. Roman } 900d24d4204SJose E. Roman if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 901d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 902d24d4204SJose E. Roman if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 903d24d4204SJose E. Roman PetscFunctionReturn(0); 904d24d4204SJose E. Roman } 905d24d4204SJose E. Roman 906d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 907d24d4204SJose E. Roman { 908d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 909d24d4204SJose E. Roman PetscErrorCode ierr; 910d24d4204SJose E. Roman 911d24d4204SJose E. Roman PetscFunctionBegin; 912d24d4204SJose E. Roman ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 913d24d4204SJose E. Roman PetscFunctionReturn(0); 914d24d4204SJose E. Roman } 915d24d4204SJose E. Roman 916d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 917d24d4204SJose E. Roman { 918d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 919d24d4204SJose E. Roman PetscErrorCode ierr; 920d24d4204SJose E. Roman PetscInt i,n,nb,isrc,nproc,iproc,*idx; 921d24d4204SJose E. Roman 922d24d4204SJose E. Roman PetscFunctionBegin; 923d24d4204SJose E. Roman if (rows) { 924d24d4204SJose E. Roman n = a->locr; 925d24d4204SJose E. Roman nb = a->mb; 926d24d4204SJose E. Roman isrc = a->rsrc; 927d24d4204SJose E. Roman nproc = a->grid->nprow; 928d24d4204SJose E. Roman iproc = a->grid->myrow; 929d24d4204SJose E. Roman ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 930d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 931d24d4204SJose E. Roman ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 932d24d4204SJose E. Roman } 933d24d4204SJose E. Roman if (cols) { 934d24d4204SJose E. Roman n = a->locc; 935d24d4204SJose E. Roman nb = a->nb; 936d24d4204SJose E. Roman isrc = a->csrc; 937d24d4204SJose E. Roman nproc = a->grid->npcol; 938d24d4204SJose E. Roman iproc = a->grid->mycol; 939d24d4204SJose E. Roman ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 940d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 941d24d4204SJose E. Roman ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 942d24d4204SJose E. Roman } 943d24d4204SJose E. Roman PetscFunctionReturn(0); 944d24d4204SJose E. Roman } 945d24d4204SJose E. Roman 946d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 947d24d4204SJose E. Roman { 948d24d4204SJose E. Roman PetscErrorCode ierr; 949d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 950d24d4204SJose E. Roman Mat Bmpi; 951d24d4204SJose E. Roman MPI_Comm comm; 9524b1a79daSJose E. Roman PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 9534b1a79daSJose E. Roman const PetscInt *ranges,*branges,*cwork; 9544b1a79daSJose E. Roman const PetscScalar *vwork; 955d24d4204SJose E. Roman PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 956d24d4204SJose E. Roman PetscScalar *barray; 9574b1a79daSJose E. Roman PetscBool differ=PETSC_FALSE; 9584b1a79daSJose E. Roman PetscMPIInt size; 959d24d4204SJose E. Roman 960d24d4204SJose E. Roman PetscFunctionBegin; 961d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9624b1a79daSJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 9634b1a79daSJose E. Roman 9644b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 965ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 9664b1a79daSJose E. Roman ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr); 9674b1a79daSJose E. Roman for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 9684b1a79daSJose E. Roman } 9694b1a79daSJose E. Roman 9704b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 9714b1a79daSJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 9724b1a79daSJose E. Roman m = PETSC_DECIDE; 9734b1a79daSJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 9744b1a79daSJose E. Roman n = PETSC_DECIDE; 9754b1a79daSJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 9764b1a79daSJose E. Roman ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 9774b1a79daSJose E. Roman ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 9784b1a79daSJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 9794b1a79daSJose E. Roman 9804b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 9814b1a79daSJose E. Roman ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 9824b1a79daSJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 9834b1a79daSJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 9844b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit",info); 9854b1a79daSJose E. Roman 9864b1a79daSJose E. Roman /* redistribute matrix */ 9874b1a79daSJose E. Roman ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 9884b1a79daSJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 9894b1a79daSJose E. Roman ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 9904b1a79daSJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9914b1a79daSJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9924b1a79daSJose E. Roman 9934b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */ 9944b1a79daSJose E. Roman ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr); 9954b1a79daSJose E. Roman for (i=rstart;i<rend;i++) { 9964b1a79daSJose E. Roman ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 9974b1a79daSJose E. Roman ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 9984b1a79daSJose E. Roman ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 9994b1a79daSJose E. Roman } 10004b1a79daSJose E. Roman ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10014b1a79daSJose E. Roman ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10024b1a79daSJose E. Roman ierr = MatDestroy(&Bmpi);CHKERRQ(ierr); 10034b1a79daSJose E. Roman 10044b1a79daSJose E. Roman } else { /* normal cases */ 1005d24d4204SJose E. Roman 1006d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1007d24d4204SJose E. Roman else { 1008d24d4204SJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 100992c846b4SJose E. Roman m = PETSC_DECIDE; 101092c846b4SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 101192c846b4SJose E. Roman n = PETSC_DECIDE; 101292c846b4SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 101392c846b4SJose E. Roman ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1014d24d4204SJose E. Roman ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 1015d24d4204SJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1016d24d4204SJose E. Roman } 1017d24d4204SJose E. Roman 1018d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 1019d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 1020d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1021d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 1022d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1023d24d4204SJose E. Roman 1024d24d4204SJose E. Roman /* redistribute matrix */ 1025d24d4204SJose E. Roman ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 1026d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1027d24d4204SJose E. Roman ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 1028d24d4204SJose E. Roman 1029d24d4204SJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030d24d4204SJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 1032d24d4204SJose E. Roman ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1033d24d4204SJose E. Roman } else *B = Bmpi; 10344b1a79daSJose E. Roman } 1035d24d4204SJose E. Roman PetscFunctionReturn(0); 1036d24d4204SJose E. Roman } 1037d24d4204SJose E. Roman 1038d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1039d24d4204SJose E. Roman { 1040d24d4204SJose E. Roman PetscErrorCode ierr; 1041d24d4204SJose E. Roman Mat_ScaLAPACK *b; 1042d24d4204SJose E. Roman Mat Bmpi; 1043d24d4204SJose E. Roman MPI_Comm comm; 104492c846b4SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1045d24d4204SJose E. Roman const PetscInt *ranges; 1046d24d4204SJose E. Roman PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1047d24d4204SJose E. Roman PetscScalar *aarray; 10484e8b52a1SJose E. Roman PetscInt lda; 1049d24d4204SJose E. Roman 1050d24d4204SJose E. Roman PetscFunctionBegin; 1051d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1052d24d4204SJose E. Roman 1053d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1054d24d4204SJose E. Roman else { 1055d24d4204SJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 105692c846b4SJose E. Roman m = PETSC_DECIDE; 105792c846b4SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 105892c846b4SJose E. Roman n = PETSC_DECIDE; 105992c846b4SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 106092c846b4SJose E. Roman ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1061d24d4204SJose E. Roman ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1062d24d4204SJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1063d24d4204SJose E. Roman } 1064d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bmpi->data; 1065d24d4204SJose E. Roman 1066d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 1067d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1068d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 10694e8b52a1SJose E. Roman ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 10704e8b52a1SJose E. Roman lld = PetscMax(lda,1); /* local leading dimension */ 1071d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1072d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1073d24d4204SJose E. Roman 1074d24d4204SJose E. Roman /* redistribute matrix */ 1075d24d4204SJose E. Roman ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1076d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1077d24d4204SJose E. Roman ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1078d24d4204SJose E. Roman 1079d24d4204SJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1080d24d4204SJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1081d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 1082d24d4204SJose E. Roman ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1083d24d4204SJose E. Roman } else *B = Bmpi; 1084d24d4204SJose E. Roman PetscFunctionReturn(0); 1085d24d4204SJose E. Roman } 1086d24d4204SJose E. Roman 1087d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1088d24d4204SJose E. Roman { 1089d24d4204SJose E. Roman Mat mat_scal; 1090d24d4204SJose E. Roman PetscErrorCode ierr; 1091d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1092d24d4204SJose E. Roman const PetscInt *cols; 1093d24d4204SJose E. Roman const PetscScalar *vals; 1094d24d4204SJose E. Roman 1095d24d4204SJose E. Roman PetscFunctionBegin; 1096d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1097d24d4204SJose E. Roman mat_scal = *newmat; 1098d24d4204SJose E. Roman ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1099d24d4204SJose E. Roman } else { 1100d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1101d24d4204SJose E. Roman m = PETSC_DECIDE; 1102d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1103d24d4204SJose E. Roman n = PETSC_DECIDE; 1104d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1105d24d4204SJose E. Roman ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1106d24d4204SJose E. Roman ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1107d24d4204SJose E. Roman ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1108d24d4204SJose E. Roman } 1109d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 1110d24d4204SJose E. Roman ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1111d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1112d24d4204SJose E. Roman ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1113d24d4204SJose E. Roman } 1114d24d4204SJose E. Roman ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115d24d4204SJose E. Roman ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116d24d4204SJose E. Roman 1117d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1118d24d4204SJose E. Roman else *newmat = mat_scal; 1119d24d4204SJose E. Roman PetscFunctionReturn(0); 1120d24d4204SJose E. Roman } 1121d24d4204SJose E. Roman 1122d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1123d24d4204SJose E. Roman { 1124d24d4204SJose E. Roman Mat mat_scal; 1125d24d4204SJose E. Roman PetscErrorCode ierr; 1126d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1127d24d4204SJose E. Roman const PetscInt *cols; 1128d24d4204SJose E. Roman const PetscScalar *vals; 1129d24d4204SJose E. Roman PetscScalar v; 1130d24d4204SJose E. Roman 1131d24d4204SJose E. Roman PetscFunctionBegin; 1132d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1133d24d4204SJose E. Roman mat_scal = *newmat; 1134d24d4204SJose E. Roman ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1135d24d4204SJose E. Roman } else { 1136d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1137d24d4204SJose E. Roman m = PETSC_DECIDE; 1138d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1139d24d4204SJose E. Roman n = PETSC_DECIDE; 1140d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1141d24d4204SJose E. Roman ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1142d24d4204SJose E. Roman ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1143d24d4204SJose E. Roman ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1144d24d4204SJose E. Roman } 1145d24d4204SJose E. Roman ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1146d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 1147d24d4204SJose E. Roman ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1148d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1149d24d4204SJose E. Roman for (j=0;j<ncols;j++) { /* lower triangular part */ 1150d24d4204SJose E. Roman if (cols[j] == row) continue; 1151d24d4204SJose E. Roman v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1152d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1153d24d4204SJose E. Roman } 1154d24d4204SJose E. Roman ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1155d24d4204SJose E. Roman } 1156d24d4204SJose E. Roman ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1157d24d4204SJose E. Roman ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158d24d4204SJose E. Roman ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159d24d4204SJose E. Roman 1160d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1161d24d4204SJose E. Roman else *newmat = mat_scal; 1162d24d4204SJose E. Roman PetscFunctionReturn(0); 1163d24d4204SJose E. Roman } 1164d24d4204SJose E. Roman 1165d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1166d24d4204SJose E. Roman { 1167d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1168d24d4204SJose E. Roman PetscErrorCode ierr; 1169d24d4204SJose E. Roman PetscInt sz=0; 1170d24d4204SJose E. Roman 1171d24d4204SJose E. Roman PetscFunctionBegin; 1172d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1173d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1174d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1175d24d4204SJose E. Roman 1176d24d4204SJose E. Roman ierr = PetscFree(a->loc);CHKERRQ(ierr); 1177d24d4204SJose E. Roman ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1178d24d4204SJose E. Roman ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1179d24d4204SJose E. Roman ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1180d24d4204SJose E. Roman 1181d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1182d24d4204SJose E. Roman PetscFunctionReturn(0); 1183d24d4204SJose E. Roman } 1184d24d4204SJose E. Roman 1185d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1186d24d4204SJose E. Roman { 1187d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1188d24d4204SJose E. Roman PetscErrorCode ierr; 1189d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1190d24d4204SJose E. Roman PetscBool flg; 1191d24d4204SJose E. Roman MPI_Comm icomm; 1192d24d4204SJose E. Roman 1193d24d4204SJose E. Roman PetscFunctionBegin; 1194d24d4204SJose E. Roman ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1195d24d4204SJose E. Roman ierr = PetscFree(a->loc);CHKERRQ(ierr); 1196d24d4204SJose E. Roman ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1197d24d4204SJose E. Roman ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1198ffc4695bSBarry Smith ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1199d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1200d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1201d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1202d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 1203d24d4204SJose E. Roman ierr = PetscFree(grid);CHKERRQ(ierr); 1204ffc4695bSBarry Smith ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 1205d24d4204SJose E. Roman } 1206d24d4204SJose E. Roman ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1207d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1208d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1209d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1210d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1211d24d4204SJose E. Roman ierr = PetscFree(A->data);CHKERRQ(ierr); 1212d24d4204SJose E. Roman PetscFunctionReturn(0); 1213d24d4204SJose E. Roman } 1214d24d4204SJose E. Roman 1215d24d4204SJose E. Roman PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1216d24d4204SJose E. Roman { 1217d24d4204SJose E. Roman PetscErrorCode ierr; 1218d24d4204SJose E. Roman const PetscInt *ranges; 1219d24d4204SJose E. Roman PetscMPIInt size; 1220d24d4204SJose E. Roman PetscInt i,n; 1221d24d4204SJose E. Roman 1222d24d4204SJose E. Roman PetscFunctionBegin; 1223ffc4695bSBarry Smith ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr); 1224d24d4204SJose E. Roman if (size>2) { 1225d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1226d24d4204SJose E. Roman n = ranges[1]-ranges[0]; 1227d24d4204SJose E. Roman for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1228d24d4204SJose E. Roman if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1229d24d4204SJose E. Roman } 1230d24d4204SJose E. Roman PetscFunctionReturn(0); 1231d24d4204SJose E. Roman } 1232d24d4204SJose E. Roman 1233d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1234d24d4204SJose E. Roman { 1235d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1236d24d4204SJose E. Roman PetscErrorCode ierr; 1237d24d4204SJose E. Roman PetscBLASInt info=0; 1238d24d4204SJose E. Roman 1239d24d4204SJose E. Roman PetscFunctionBegin; 1240d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1241d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1242d24d4204SJose E. Roman 1243d24d4204SJose E. Roman /* check that the layout is as enforced by MatCreateScaLAPACK */ 1244d24d4204SJose E. Roman ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1245d24d4204SJose E. Roman ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1246d24d4204SJose E. Roman 1247d24d4204SJose E. Roman /* compute local sizes */ 1248d24d4204SJose E. Roman ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1249d24d4204SJose E. Roman ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1250d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1251d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1252d24d4204SJose E. Roman a->lld = PetscMax(1,a->locr); 1253d24d4204SJose E. Roman 1254d24d4204SJose E. Roman /* allocate local array */ 1255d24d4204SJose E. Roman ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1256d24d4204SJose E. Roman 1257d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1258d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1259d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1260d24d4204SJose E. Roman PetscFunctionReturn(0); 1261d24d4204SJose E. Roman } 1262d24d4204SJose E. Roman 1263d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1264d24d4204SJose E. Roman { 1265d24d4204SJose E. Roman PetscErrorCode ierr; 1266d24d4204SJose E. Roman PetscInt nstash,reallocs; 1267d24d4204SJose E. Roman 1268d24d4204SJose E. Roman PetscFunctionBegin; 1269d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1270d24d4204SJose E. Roman ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1271d24d4204SJose E. Roman ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1272d24d4204SJose E. Roman ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1273d24d4204SJose E. Roman PetscFunctionReturn(0); 1274d24d4204SJose E. Roman } 1275d24d4204SJose E. Roman 1276d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1277d24d4204SJose E. Roman { 1278d24d4204SJose E. Roman PetscErrorCode ierr; 1279d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1280d24d4204SJose E. Roman PetscMPIInt n; 1281d24d4204SJose E. Roman PetscInt i,flg,*row,*col; 1282d24d4204SJose E. Roman PetscScalar *val; 1283d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1284d24d4204SJose E. Roman 1285d24d4204SJose E. Roman PetscFunctionBegin; 1286d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1287d24d4204SJose E. Roman while (1) { 1288d24d4204SJose E. Roman ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1289d24d4204SJose E. Roman if (!flg) break; 1290d24d4204SJose E. Roman for (i=0;i<n;i++) { 1291d24d4204SJose E. Roman ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1292d24d4204SJose E. Roman ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1293d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1294d8185827SBarry Smith if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1295d24d4204SJose E. Roman switch (A->insertmode) { 1296d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1297d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1298d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1299d24d4204SJose E. Roman } 1300d24d4204SJose E. Roman } 1301d24d4204SJose E. Roman } 1302d24d4204SJose E. Roman ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1303d24d4204SJose E. Roman PetscFunctionReturn(0); 1304d24d4204SJose E. Roman } 1305d24d4204SJose E. Roman 1306d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1307d24d4204SJose E. Roman { 1308d24d4204SJose E. Roman PetscErrorCode ierr; 1309d24d4204SJose E. Roman Mat Adense,As; 1310d24d4204SJose E. Roman MPI_Comm comm; 1311d24d4204SJose E. Roman 1312d24d4204SJose E. Roman PetscFunctionBegin; 1313d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1314d24d4204SJose E. Roman ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1315d24d4204SJose E. Roman ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1316d24d4204SJose E. Roman ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1317d24d4204SJose E. Roman ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1318d24d4204SJose E. Roman ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1319d24d4204SJose E. Roman ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1320d24d4204SJose E. Roman PetscFunctionReturn(0); 1321d24d4204SJose E. Roman } 1322d24d4204SJose E. Roman 1323d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 1324d24d4204SJose E. Roman static struct _MatOps MatOps_Values = { 1325d24d4204SJose E. Roman MatSetValues_ScaLAPACK, 1326d24d4204SJose E. Roman 0, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman MatMult_ScaLAPACK, 1329d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1330d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1331d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1332d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1333d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1334d24d4204SJose E. Roman 0, 1335d24d4204SJose E. Roman /*10*/ 0, 1336d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1337d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1340d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1341d24d4204SJose E. Roman 0, 1342d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1343d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1344d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1345d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1346d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1347d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1348d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1349d24d4204SJose E. Roman /*24*/ 0, 1350d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1351d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1352d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1353d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1354d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1355d24d4204SJose E. Roman 0, 1356d24d4204SJose E. Roman 0, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1360d24d4204SJose E. Roman 0, 1361d24d4204SJose E. Roman 0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1365d24d4204SJose E. Roman 0, 1366d24d4204SJose E. Roman 0, 1367d24d4204SJose E. Roman 0, 1368d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1369d24d4204SJose E. Roman /*44*/ 0, 1370d24d4204SJose E. Roman MatScale_ScaLAPACK, 1371d24d4204SJose E. Roman MatShift_ScaLAPACK, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman 0, 1374d24d4204SJose E. Roman /*49*/ 0, 1375d24d4204SJose E. Roman 0, 1376d24d4204SJose E. Roman 0, 1377d24d4204SJose E. Roman 0, 1378d24d4204SJose E. Roman 0, 1379d24d4204SJose E. Roman /*54*/ 0, 1380d24d4204SJose E. Roman 0, 1381d24d4204SJose E. Roman 0, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman /*59*/ 0, 1385d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1386d24d4204SJose E. Roman MatView_ScaLAPACK, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman /*64*/ 0, 1390d24d4204SJose E. Roman 0, 1391d24d4204SJose E. Roman 0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman /*69*/ 0, 1395d24d4204SJose E. Roman 0, 1396d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1397d24d4204SJose E. Roman 0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman /*74*/ 0, 1400d24d4204SJose E. Roman 0, 1401d24d4204SJose E. Roman 0, 1402d24d4204SJose E. Roman 0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman /*79*/ 0, 1405d24d4204SJose E. Roman 0, 1406d24d4204SJose E. Roman 0, 1407d24d4204SJose E. Roman 0, 1408d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1409d24d4204SJose E. Roman /*84*/ 0, 1410d24d4204SJose E. Roman 0, 1411d24d4204SJose E. Roman 0, 1412d24d4204SJose E. Roman 0, 1413d24d4204SJose E. Roman 0, 1414d24d4204SJose E. Roman /*89*/ 0, 1415d24d4204SJose E. Roman 0, 1416d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1417d24d4204SJose E. Roman 0, 1418d24d4204SJose E. Roman 0, 1419d24d4204SJose E. Roman /*94*/ 0, 1420d24d4204SJose E. Roman 0, 1421d24d4204SJose E. Roman 0, 1422d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1423d24d4204SJose E. Roman 0, 1424d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1425d24d4204SJose E. Roman 0, 1426d24d4204SJose E. Roman 0, 1427d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1428d24d4204SJose E. Roman 0, 1429d24d4204SJose E. Roman /*104*/0, 1430d24d4204SJose E. Roman 0, 1431d24d4204SJose E. Roman 0, 1432d24d4204SJose E. Roman 0, 1433d24d4204SJose E. Roman 0, 1434d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK, 1435d24d4204SJose E. Roman 0, 1436d24d4204SJose E. Roman 0, 1437d24d4204SJose E. Roman 0, 1438d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1439d24d4204SJose E. Roman /*114*/0, 1440d24d4204SJose E. Roman 0, 1441d24d4204SJose E. Roman 0, 1442d24d4204SJose E. Roman 0, 1443d24d4204SJose E. Roman 0, 1444d24d4204SJose E. Roman /*119*/0, 1445d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1446d24d4204SJose E. Roman 0, 1447d24d4204SJose E. Roman 0, 1448d24d4204SJose E. Roman 0, 1449d24d4204SJose E. Roman /*124*/0, 1450d24d4204SJose E. Roman 0, 1451d24d4204SJose E. Roman 0, 1452d24d4204SJose E. Roman 0, 1453d24d4204SJose E. Roman 0, 1454d24d4204SJose E. Roman /*129*/0, 1455d24d4204SJose E. Roman 0, 1456d24d4204SJose E. Roman 0, 1457d24d4204SJose E. Roman 0, 1458d24d4204SJose E. Roman 0, 1459d24d4204SJose E. Roman /*134*/0, 1460d24d4204SJose E. Roman 0, 1461d24d4204SJose E. Roman 0, 1462d24d4204SJose E. Roman 0, 1463d24d4204SJose E. Roman 0, 1464d24d4204SJose E. Roman 0, 1465d24d4204SJose E. Roman /*140*/0, 1466d24d4204SJose E. Roman 0, 1467d24d4204SJose E. Roman 0, 1468d24d4204SJose E. Roman 0, 1469d24d4204SJose E. Roman 0, 1470d24d4204SJose E. Roman /*145*/0, 1471d24d4204SJose E. Roman 0, 1472d24d4204SJose E. Roman 0 1473d24d4204SJose E. Roman }; 1474d24d4204SJose E. Roman 1475d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1476d24d4204SJose E. Roman { 1477d24d4204SJose E. Roman PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1478d24d4204SJose E. Roman PetscInt size=stash->size,nsends; 1479d24d4204SJose E. Roman PetscErrorCode ierr; 1480d24d4204SJose E. Roman PetscInt count,*sindices,**rindices,i,j,l; 1481d24d4204SJose E. Roman PetscScalar **rvalues,*svalues; 1482d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1483d24d4204SJose E. Roman MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1484d24d4204SJose E. Roman PetscMPIInt *sizes,*nlengths,nreceives; 1485d24d4204SJose E. Roman PetscInt *sp_idx,*sp_idy; 1486d24d4204SJose E. Roman PetscScalar *sp_val; 1487d24d4204SJose E. Roman PetscMatStashSpace space,space_next; 1488d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1489d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1490d24d4204SJose E. Roman 1491d24d4204SJose E. Roman PetscFunctionBegin; 1492d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1493d24d4204SJose E. Roman InsertMode addv; 1494820f2d46SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1495d24d4204SJose E. Roman if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1496d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1497d24d4204SJose E. Roman } 1498d24d4204SJose E. Roman 1499d24d4204SJose E. Roman bs2 = stash->bs*stash->bs; 1500d24d4204SJose E. Roman 1501d24d4204SJose E. Roman /* first count number of contributors to each processor */ 1502d24d4204SJose E. Roman ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1503d24d4204SJose E. Roman ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1504d24d4204SJose E. Roman 1505d24d4204SJose E. Roman i = j = 0; 1506d24d4204SJose E. Roman space = stash->space_head; 1507d24d4204SJose E. Roman while (space) { 1508d24d4204SJose E. Roman space_next = space->next; 1509d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1510d24d4204SJose E. Roman ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1511d24d4204SJose E. Roman ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1512d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1513d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1514d24d4204SJose E. Roman nlengths[j]++; owner[i] = j; 1515d24d4204SJose E. Roman i++; 1516d24d4204SJose E. Roman } 1517d24d4204SJose E. Roman space = space_next; 1518d24d4204SJose E. Roman } 1519d24d4204SJose E. Roman 1520d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 1521d24d4204SJose E. Roman ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1522d24d4204SJose E. Roman for (i=0, nsends=0; i<size; i++) { 1523d24d4204SJose E. Roman if (nlengths[i]) { 1524d24d4204SJose E. Roman sizes[i] = 1; nsends++; 1525d24d4204SJose E. Roman } 1526d24d4204SJose E. Roman } 1527d24d4204SJose E. Roman 1528d24d4204SJose E. Roman {PetscMPIInt *onodes,*olengths; 1529d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 1530d24d4204SJose E. Roman ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1531d24d4204SJose E. Roman ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1532d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1533d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] *=2; 1534d24d4204SJose E. Roman ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1535d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1536d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1537d24d4204SJose E. Roman ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1538d24d4204SJose E. Roman ierr = PetscFree(onodes);CHKERRQ(ierr); 1539d24d4204SJose E. Roman ierr = PetscFree(olengths);CHKERRQ(ierr);} 1540d24d4204SJose E. Roman 1541d24d4204SJose E. Roman /* do sends: 1542d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1543d24d4204SJose E. Roman the ith processor 1544d24d4204SJose E. Roman */ 1545d24d4204SJose E. Roman ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1546d24d4204SJose E. Roman ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1547d24d4204SJose E. Roman ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1548d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 1549d24d4204SJose E. Roman startv[0] = 0; starti[0] = 0; 1550d24d4204SJose E. Roman for (i=1; i<size; i++) { 1551d24d4204SJose E. Roman startv[i] = startv[i-1] + nlengths[i-1]; 1552d24d4204SJose E. Roman starti[i] = starti[i-1] + 2*nlengths[i-1]; 1553d24d4204SJose E. Roman } 1554d24d4204SJose E. Roman 1555d24d4204SJose E. Roman i = 0; 1556d24d4204SJose E. Roman space = stash->space_head; 1557d24d4204SJose E. Roman while (space) { 1558d24d4204SJose E. Roman space_next = space->next; 1559d24d4204SJose E. Roman sp_idx = space->idx; 1560d24d4204SJose E. Roman sp_idy = space->idy; 1561d24d4204SJose E. Roman sp_val = space->val; 1562d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1563d24d4204SJose E. Roman j = owner[i]; 1564d24d4204SJose E. Roman if (bs2 == 1) { 1565d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1566d24d4204SJose E. Roman } else { 1567d24d4204SJose E. Roman PetscInt k; 1568d24d4204SJose E. Roman PetscScalar *buf1,*buf2; 1569d24d4204SJose E. Roman buf1 = svalues+bs2*startv[j]; 1570d24d4204SJose E. Roman buf2 = space->val + bs2*l; 1571d24d4204SJose E. Roman for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1572d24d4204SJose E. Roman } 1573d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1574d24d4204SJose E. Roman sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1575d24d4204SJose E. Roman startv[j]++; 1576d24d4204SJose E. Roman starti[j]++; 1577d24d4204SJose E. Roman i++; 1578d24d4204SJose E. Roman } 1579d24d4204SJose E. Roman space = space_next; 1580d24d4204SJose E. Roman } 1581d24d4204SJose E. Roman startv[0] = 0; 1582d24d4204SJose E. Roman for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1583d24d4204SJose E. Roman 1584d24d4204SJose E. Roman for (i=0,count=0; i<size; i++) { 1585d24d4204SJose E. Roman if (sizes[i]) { 1586ffc4695bSBarry Smith ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr); 1587ffc4695bSBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr); 1588d24d4204SJose E. Roman } 1589d24d4204SJose E. Roman } 1590d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 1591d24d4204SJose E. Roman ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1592d24d4204SJose E. Roman for (i=0; i<size; i++) { 1593d24d4204SJose E. Roman if (sizes[i]) { 1594d24d4204SJose E. Roman ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1595d24d4204SJose E. Roman } 1596d24d4204SJose E. Roman } 1597d24d4204SJose E. Roman #endif 1598d24d4204SJose E. Roman ierr = PetscFree(nlengths);CHKERRQ(ierr); 1599d24d4204SJose E. Roman ierr = PetscFree(owner);CHKERRQ(ierr); 1600d24d4204SJose E. Roman ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1601d24d4204SJose E. Roman ierr = PetscFree(sizes);CHKERRQ(ierr); 1602d24d4204SJose E. Roman 1603d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1604d24d4204SJose E. Roman ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1605d24d4204SJose E. Roman 1606d24d4204SJose E. Roman for (i=0; i<nreceives; i++) { 1607d24d4204SJose E. Roman recv_waits[2*i] = recv_waits1[i]; 1608d24d4204SJose E. Roman recv_waits[2*i+1] = recv_waits2[i]; 1609d24d4204SJose E. Roman } 1610d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1611d24d4204SJose E. Roman 1612d24d4204SJose E. Roman ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1613d24d4204SJose E. Roman ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1614d24d4204SJose E. Roman 1615d24d4204SJose E. Roman stash->svalues = svalues; 1616d24d4204SJose E. Roman stash->sindices = sindices; 1617d24d4204SJose E. Roman stash->rvalues = rvalues; 1618d24d4204SJose E. Roman stash->rindices = rindices; 1619d24d4204SJose E. Roman stash->send_waits = send_waits; 1620d24d4204SJose E. Roman stash->nsends = nsends; 1621d24d4204SJose E. Roman stash->nrecvs = nreceives; 1622d24d4204SJose E. Roman stash->reproduce_count = 0; 1623d24d4204SJose E. Roman PetscFunctionReturn(0); 1624d24d4204SJose E. Roman } 1625d24d4204SJose E. Roman 1626d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1627d24d4204SJose E. Roman { 1628d24d4204SJose E. Roman PetscErrorCode ierr; 1629d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1630d24d4204SJose E. Roman 1631d24d4204SJose E. Roman PetscFunctionBegin; 1632d24d4204SJose E. Roman if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1633d24d4204SJose E. Roman if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1634d24d4204SJose E. Roman if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1635d24d4204SJose E. Roman ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1636d24d4204SJose E. Roman ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1637d24d4204SJose E. Roman PetscFunctionReturn(0); 1638d24d4204SJose E. Roman } 1639d24d4204SJose E. Roman 1640d24d4204SJose E. Roman /*@ 1641d24d4204SJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1642d24d4204SJose E. Roman the ScaLAPACK matrix 1643d24d4204SJose E. Roman 1644d24d4204SJose E. Roman Logically Collective on A 1645d24d4204SJose E. Roman 1646*d8d19677SJose E. Roman Input Parameters: 1647d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1648d24d4204SJose E. Roman . mb - the row block size 1649d24d4204SJose E. Roman - nb - the column block size 1650d24d4204SJose E. Roman 1651d24d4204SJose E. Roman Level: intermediate 1652d24d4204SJose E. Roman 1653d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1654d24d4204SJose E. Roman @*/ 1655d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1656d24d4204SJose E. Roman { 1657d24d4204SJose E. Roman PetscErrorCode ierr; 1658d24d4204SJose E. Roman 1659d24d4204SJose E. Roman PetscFunctionBegin; 1660d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1661d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,mb,2); 1662d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,nb,3); 1663d24d4204SJose E. Roman ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1664d24d4204SJose E. Roman PetscFunctionReturn(0); 1665d24d4204SJose E. Roman } 1666d24d4204SJose E. Roman 1667d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1668d24d4204SJose E. Roman { 1669d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1670d24d4204SJose E. Roman 1671d24d4204SJose E. Roman PetscFunctionBegin; 1672d24d4204SJose E. Roman if (mb) *mb = a->mb; 1673d24d4204SJose E. Roman if (nb) *nb = a->nb; 1674d24d4204SJose E. Roman PetscFunctionReturn(0); 1675d24d4204SJose E. Roman } 1676d24d4204SJose E. Roman 1677d24d4204SJose E. Roman /*@ 1678d24d4204SJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1679d24d4204SJose E. Roman the ScaLAPACK matrix 1680d24d4204SJose E. Roman 1681d24d4204SJose E. Roman Not collective 1682d24d4204SJose E. Roman 1683d24d4204SJose E. Roman Input Parameter: 1684d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1685d24d4204SJose E. Roman 1686d24d4204SJose E. Roman Output Parameters: 1687d24d4204SJose E. Roman + mb - the row block size 1688d24d4204SJose E. Roman - nb - the column block size 1689d24d4204SJose E. Roman 1690d24d4204SJose E. Roman Level: intermediate 1691d24d4204SJose E. Roman 1692d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1693d24d4204SJose E. Roman @*/ 1694d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1695d24d4204SJose E. Roman { 1696d24d4204SJose E. Roman PetscErrorCode ierr; 1697d24d4204SJose E. Roman 1698d24d4204SJose E. Roman PetscFunctionBegin; 1699d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1700d24d4204SJose E. Roman ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1701d24d4204SJose E. Roman PetscFunctionReturn(0); 1702d24d4204SJose E. Roman } 1703d24d4204SJose E. Roman 1704d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1705d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1706d24d4204SJose E. Roman 1707d24d4204SJose E. Roman /*MC 1708d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1709d24d4204SJose E. Roman 1710d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1711d24d4204SJose E. Roman 1712d24d4204SJose E. Roman Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1713d24d4204SJose E. Roman 1714d24d4204SJose E. Roman Options Database Keys: 1715d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1716d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1717d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1718d24d4204SJose E. Roman 1719d24d4204SJose E. Roman Level: beginner 1720d24d4204SJose E. Roman 1721d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL 1722d24d4204SJose E. Roman M*/ 1723d24d4204SJose E. Roman 1724d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1725d24d4204SJose E. Roman { 1726d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1727d24d4204SJose E. Roman PetscErrorCode ierr; 1728d24d4204SJose E. Roman PetscBool flg,flg1; 1729d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1730d24d4204SJose E. Roman MPI_Comm icomm; 1731d24d4204SJose E. Roman PetscBLASInt nprow,npcol,myrow,mycol; 1732d24d4204SJose E. Roman PetscInt optv1,k=2,array[2]={0,0}; 1733d24d4204SJose E. Roman PetscMPIInt size; 1734d24d4204SJose E. Roman 1735d24d4204SJose E. Roman PetscFunctionBegin; 1736d24d4204SJose E. Roman ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1737d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1738d24d4204SJose E. Roman 1739d24d4204SJose E. Roman ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1740d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1741d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1742d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1743d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1744d24d4204SJose E. Roman 1745d24d4204SJose E. Roman ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1746d24d4204SJose E. Roman A->data = (void*)a; 1747d24d4204SJose E. Roman 1748d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1749d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1750ffc4695bSBarry Smith ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr); 1751f7ec113fSDamian Marek ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr); 175227e75052SPierre Jolivet ierr = PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);CHKERRQ(ierr); 1753d24d4204SJose E. Roman } 1754d24d4204SJose E. Roman ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1755ffc4695bSBarry Smith ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1756d24d4204SJose E. Roman if (!flg) { 1757d24d4204SJose E. Roman ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1758d24d4204SJose E. Roman 1759ffc4695bSBarry Smith ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr); 1760d24d4204SJose E. Roman grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1761d24d4204SJose E. Roman 1762d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1763d24d4204SJose E. Roman ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1764d24d4204SJose E. Roman if (flg1) { 1765d24d4204SJose E. Roman if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1766d24d4204SJose E. Roman grid->nprow = optv1; 1767d24d4204SJose E. Roman } 1768d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1769d24d4204SJose E. Roman 1770d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1771d24d4204SJose E. Roman grid->npcol = size/grid->nprow; 1772d24d4204SJose E. Roman ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1773d24d4204SJose E. Roman ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1774f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm); 1775d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1776d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1777d24d4204SJose E. Roman grid->grid_refct = 1; 1778d24d4204SJose E. Roman grid->nprow = nprow; 1779d24d4204SJose E. Roman grid->npcol = npcol; 1780d24d4204SJose E. Roman grid->myrow = myrow; 1781d24d4204SJose E. Roman grid->mycol = mycol; 1782d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1783f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm); 1784d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1785f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm); 1786d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1787ffc4695bSBarry Smith ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr); 1788d24d4204SJose E. Roman 1789d24d4204SJose E. Roman } else grid->grid_refct++; 1790d24d4204SJose E. Roman ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1791d24d4204SJose E. Roman a->grid = grid; 1792d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1793d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1794d24d4204SJose E. Roman 1795d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1796d24d4204SJose E. Roman ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1797d24d4204SJose E. Roman if (flg) { 1798d24d4204SJose E. Roman a->mb = array[0]; 1799d24d4204SJose E. Roman a->nb = (k>1)? array[1]: a->mb; 1800d24d4204SJose E. Roman } 1801d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1802d24d4204SJose E. Roman 1803d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1804d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1805d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1806d24d4204SJose E. Roman ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1807d24d4204SJose E. Roman PetscFunctionReturn(0); 1808d24d4204SJose E. Roman } 1809d24d4204SJose E. Roman 1810d24d4204SJose E. Roman /*@C 1811d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1812d24d4204SJose E. Roman (2D block cyclic distribution). 1813d24d4204SJose E. Roman 1814d24d4204SJose E. Roman Collective 1815d24d4204SJose E. Roman 1816d24d4204SJose E. Roman Input Parameters: 1817d24d4204SJose E. Roman + comm - MPI communicator 1818d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1819d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1820d24d4204SJose E. Roman . M - number of global rows 1821d24d4204SJose E. Roman . N - number of global columns 1822d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1823d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1824d24d4204SJose E. Roman 1825d24d4204SJose E. Roman Output Parameter: 1826d24d4204SJose E. Roman . A - the matrix 1827d24d4204SJose E. Roman 1828d24d4204SJose E. Roman Options Database Keys: 1829d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1830d24d4204SJose E. Roman 1831d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1832d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1833d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1834d24d4204SJose E. Roman 1835d24d4204SJose E. Roman Notes: 1836d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1837d24d4204SJose E. Roman is chosen. 1838d24d4204SJose E. Roman 1839d24d4204SJose E. Roman Storage Information: 1840d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1841d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1842d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1843d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1844d24d4204SJose E. Roman 1845d24d4204SJose E. Roman Level: intermediate 1846d24d4204SJose E. Roman 1847d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1848d24d4204SJose E. Roman @*/ 1849d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1850d24d4204SJose E. Roman { 1851d24d4204SJose E. Roman PetscErrorCode ierr; 1852d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1853d24d4204SJose E. Roman PetscInt m,n; 1854d24d4204SJose E. Roman 1855d24d4204SJose E. Roman PetscFunctionBegin; 1856d24d4204SJose E. Roman ierr = MatCreate(comm,A);CHKERRQ(ierr); 1857d24d4204SJose E. Roman ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1858d24d4204SJose E. Roman if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1859d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1860d24d4204SJose E. Roman m = PETSC_DECIDE; 1861d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1862d24d4204SJose E. Roman n = PETSC_DECIDE; 1863d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1864d24d4204SJose E. Roman ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1865d24d4204SJose E. Roman a = (Mat_ScaLAPACK*)(*A)->data; 1866d24d4204SJose E. Roman ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1867d24d4204SJose E. Roman ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1868d24d4204SJose E. Roman ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1869d24d4204SJose E. Roman ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1870d24d4204SJose E. Roman ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1871d24d4204SJose E. Roman ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1872d24d4204SJose E. Roman ierr = MatSetUp(*A);CHKERRQ(ierr); 1873d24d4204SJose E. Roman PetscFunctionReturn(0); 1874d24d4204SJose E. Roman } 1875d24d4204SJose E. Roman 1876