1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2d24d4204SJose E. Roman 3d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 4d24d4204SJose E. Roman 5d24d4204SJose E. Roman /* 6d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8d24d4204SJose E. Roman */ 9d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10d24d4204SJose E. Roman 11d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 12d24d4204SJose E. Roman { 13d24d4204SJose E. Roman PetscErrorCode ierr; 14d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 15d24d4204SJose E. Roman PetscBool iascii; 16d24d4204SJose E. Roman PetscViewerFormat format; 17d24d4204SJose E. Roman Mat Adense; 18d24d4204SJose E. Roman 19d24d4204SJose E. Roman PetscFunctionBegin; 20d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 21d24d4204SJose E. Roman if (iascii) { 22d24d4204SJose E. Roman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 23d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 24d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 25d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 26d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 27d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 28d24d4204SJose E. Roman PetscFunctionReturn(0); 29d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 30d24d4204SJose E. Roman PetscFunctionReturn(0); 31d24d4204SJose E. Roman } 32d24d4204SJose E. Roman } 33d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 34d24d4204SJose E. Roman ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 35d24d4204SJose E. Roman ierr = MatView(Adense,viewer);CHKERRQ(ierr); 36d24d4204SJose E. Roman ierr = MatDestroy(&Adense);CHKERRQ(ierr); 37d24d4204SJose E. Roman PetscFunctionReturn(0); 38d24d4204SJose E. Roman } 39d24d4204SJose E. Roman 40d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 41d24d4204SJose E. Roman { 42d24d4204SJose E. Roman PetscErrorCode ierr; 43d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 44d24d4204SJose E. Roman PetscLogDouble isend[2],irecv[2]; 45d24d4204SJose E. Roman 46d24d4204SJose E. Roman PetscFunctionBegin; 47d24d4204SJose E. Roman info->block_size = 1.0; 48d24d4204SJose E. Roman 49d24d4204SJose E. Roman isend[0] = a->lld*a->locc; /* locally allocated */ 50d24d4204SJose E. Roman isend[1] = a->locr*a->locc; /* used submatrix */ 51d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 52d24d4204SJose E. Roman info->nz_allocated = isend[0]; 53d24d4204SJose E. Roman info->nz_used = isend[1]; 54d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 55d24d4204SJose E. Roman ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 56d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 57d24d4204SJose E. Roman info->nz_used = irecv[1]; 58d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 59d24d4204SJose E. Roman ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 60d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 61d24d4204SJose E. Roman info->nz_used = irecv[1]; 62d24d4204SJose E. Roman } 63d24d4204SJose E. Roman 64d24d4204SJose E. Roman info->nz_unneeded = 0; 65d24d4204SJose E. Roman info->assemblies = A->num_ass; 66d24d4204SJose E. Roman info->mallocs = 0; 67d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 68d24d4204SJose E. Roman info->fill_ratio_given = 0; 69d24d4204SJose E. Roman info->fill_ratio_needed = 0; 70d24d4204SJose E. Roman info->factor_mallocs = 0; 71d24d4204SJose E. Roman PetscFunctionReturn(0); 72d24d4204SJose E. Roman } 73d24d4204SJose E. Roman 74d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 75d24d4204SJose E. Roman { 76d24d4204SJose E. Roman PetscFunctionBegin; 77d24d4204SJose E. Roman switch (op) { 78d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 79d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 80d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 81d24d4204SJose E. Roman case MAT_SYMMETRIC: 82d24d4204SJose E. Roman case MAT_SORTED_FULL: 83d24d4204SJose E. Roman case MAT_HERMITIAN: 84d24d4204SJose E. Roman break; 85d24d4204SJose E. Roman default: 86d24d4204SJose E. Roman SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 87d24d4204SJose E. Roman } 88d24d4204SJose E. Roman PetscFunctionReturn(0); 89d24d4204SJose E. Roman } 90d24d4204SJose E. Roman 91d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 92d24d4204SJose E. Roman { 93d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94d24d4204SJose E. Roman PetscErrorCode ierr; 95d24d4204SJose E. Roman PetscInt i,j; 96d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 97d24d4204SJose E. Roman 98d24d4204SJose E. Roman PetscFunctionBegin; 99d24d4204SJose E. Roman for (i=0;i<nr;i++) { 100d24d4204SJose E. Roman if (rows[i] < 0) continue; 101d24d4204SJose E. Roman ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 102d24d4204SJose E. Roman for (j=0;j<nc;j++) { 103d24d4204SJose E. Roman if (cols[j] < 0) continue; 104d24d4204SJose E. Roman ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 105d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 106d24d4204SJose E. Roman if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 107d24d4204SJose E. Roman switch (imode) { 108d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 109d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 110d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 111d24d4204SJose E. Roman } 112d24d4204SJose E. Roman } else { 113d24d4204SJose E. Roman if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 114d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 115d24d4204SJose E. Roman ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 116d24d4204SJose E. Roman } 117d24d4204SJose E. Roman } 118d24d4204SJose E. Roman } 119d24d4204SJose E. Roman PetscFunctionReturn(0); 120d24d4204SJose E. Roman } 121d24d4204SJose E. Roman 122d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 123d24d4204SJose E. Roman { 124d24d4204SJose E. Roman PetscErrorCode ierr; 125d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 126d24d4204SJose E. Roman PetscScalar *x2d,*y2d,alpha=1.0; 127d24d4204SJose E. Roman const PetscInt *ranges; 128d24d4204SJose E. Roman PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 129d24d4204SJose E. Roman 130d24d4204SJose E. Roman PetscFunctionBegin; 131d24d4204SJose E. Roman if (transpose) { 132d24d4204SJose E. Roman 133d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 134d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 135d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 136d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 137d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 138d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 139d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 140d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 141d24d4204SJose E. Roman ylld = 1; 142d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 143d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 144d24d4204SJose E. Roman 145d24d4204SJose E. Roman /* allocate 2d vectors */ 146d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 147d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 148d24d4204SJose E. Roman ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 149d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 150d24d4204SJose E. Roman 151d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 152d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 153d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 154d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 155d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 156d24d4204SJose E. Roman 157d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 158d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 159d24d4204SJose E. Roman 160d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 161d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 162d24d4204SJose E. Roman 163d24d4204SJose E. Roman /* call PBLAS subroutine */ 164d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 165d24d4204SJose E. Roman 166d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 167d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 168d24d4204SJose E. Roman 169d24d4204SJose E. Roman } else { /* non-transpose */ 170d24d4204SJose E. Roman 171d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 172d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 173d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 174d24d4204SJose E. Roman xlld = 1; 175d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 176d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 177d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 178d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 179d24d4204SJose E. Roman ylld = PetscMax(1,A->rmap->n); 180d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 181d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 182d24d4204SJose E. Roman 183d24d4204SJose E. Roman /* allocate 2d vectors */ 184d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 185d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 186d24d4204SJose E. Roman ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 187d24d4204SJose E. Roman ylld = PetscMax(1,lszy); 188d24d4204SJose E. Roman 189d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 190d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 191d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 192d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 193d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 194d24d4204SJose E. Roman 195d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 196d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 197d24d4204SJose E. Roman 198d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 199d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 200d24d4204SJose E. Roman 201d24d4204SJose E. Roman /* call PBLAS subroutine */ 202d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 203d24d4204SJose E. Roman 204d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 205d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 206d24d4204SJose E. Roman 207d24d4204SJose E. Roman } 208d24d4204SJose E. Roman ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 209d24d4204SJose E. Roman PetscFunctionReturn(0); 210d24d4204SJose E. Roman } 211d24d4204SJose E. Roman 212d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 213d24d4204SJose E. Roman { 214d24d4204SJose E. Roman PetscErrorCode ierr; 215d24d4204SJose E. Roman const PetscScalar *xarray; 216d24d4204SJose E. Roman PetscScalar *yarray; 217d24d4204SJose E. Roman 218d24d4204SJose E. Roman PetscFunctionBegin; 219d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 220d24d4204SJose E. Roman ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 221d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 222d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 223d24d4204SJose E. Roman ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 224d24d4204SJose E. Roman PetscFunctionReturn(0); 225d24d4204SJose E. Roman } 226d24d4204SJose E. Roman 227d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 228d24d4204SJose E. Roman { 229d24d4204SJose E. Roman PetscErrorCode ierr; 230d24d4204SJose E. Roman const PetscScalar *xarray; 231d24d4204SJose E. Roman PetscScalar *yarray; 232d24d4204SJose E. Roman 233d24d4204SJose E. Roman PetscFunctionBegin; 234d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 235d24d4204SJose E. Roman ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 236d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 237d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 238d24d4204SJose E. Roman ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 239d24d4204SJose E. Roman PetscFunctionReturn(0); 240d24d4204SJose E. Roman } 241d24d4204SJose E. Roman 242d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 243d24d4204SJose E. Roman { 244d24d4204SJose E. Roman PetscErrorCode ierr; 245d24d4204SJose E. Roman const PetscScalar *xarray; 246d24d4204SJose E. Roman PetscScalar *zarray; 247d24d4204SJose E. Roman 248d24d4204SJose E. Roman PetscFunctionBegin; 249d24d4204SJose E. Roman if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 250d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 251d24d4204SJose E. Roman ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 252d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 253d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 254d24d4204SJose E. Roman ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 255d24d4204SJose E. Roman PetscFunctionReturn(0); 256d24d4204SJose E. Roman } 257d24d4204SJose E. Roman 258d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 259d24d4204SJose E. Roman { 260d24d4204SJose E. Roman PetscErrorCode ierr; 261d24d4204SJose E. Roman const PetscScalar *xarray; 262d24d4204SJose E. Roman PetscScalar *zarray; 263d24d4204SJose E. Roman 264d24d4204SJose E. Roman PetscFunctionBegin; 265d24d4204SJose E. Roman if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 266d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 267d24d4204SJose E. Roman ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 268d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 269d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 270d24d4204SJose E. Roman ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 271d24d4204SJose E. Roman PetscFunctionReturn(0); 272d24d4204SJose E. Roman } 273d24d4204SJose E. Roman 274d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 275d24d4204SJose E. Roman { 276d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 277d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 278d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 279d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 280d24d4204SJose E. Roman PetscBLASInt one=1; 281d24d4204SJose E. Roman 282d24d4204SJose E. Roman PetscFunctionBegin; 283d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 284d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 285d24d4204SJose E. Roman PetscFunctionReturn(0); 286d24d4204SJose E. Roman } 287d24d4204SJose E. Roman 288d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 289d24d4204SJose E. Roman { 290d24d4204SJose E. Roman PetscErrorCode ierr; 291d24d4204SJose E. Roman 292d24d4204SJose E. Roman PetscFunctionBegin; 293d24d4204SJose E. Roman ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 294d24d4204SJose E. Roman ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 295d24d4204SJose E. Roman ierr = MatSetUp(C);CHKERRQ(ierr); 296d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 297d24d4204SJose E. Roman PetscFunctionReturn(0); 298d24d4204SJose E. Roman } 299d24d4204SJose E. Roman 300d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 301d24d4204SJose E. Roman { 302d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 303d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 304d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 305d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 306d24d4204SJose E. Roman PetscBLASInt one=1; 307d24d4204SJose E. Roman 308d24d4204SJose E. Roman PetscFunctionBegin; 309d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 310d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 311d24d4204SJose E. Roman PetscFunctionReturn(0); 312d24d4204SJose E. Roman } 313d24d4204SJose E. Roman 314d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 315d24d4204SJose E. Roman { 316d24d4204SJose E. Roman PetscErrorCode ierr; 317d24d4204SJose E. Roman 318d24d4204SJose E. Roman PetscFunctionBegin; 319d24d4204SJose E. Roman ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 320d24d4204SJose E. Roman ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 321d24d4204SJose E. Roman ierr = MatSetUp(C);CHKERRQ(ierr); 322d24d4204SJose E. Roman PetscFunctionReturn(0); 323d24d4204SJose E. Roman } 324d24d4204SJose E. Roman 325d24d4204SJose E. Roman /* --------------------------------------- */ 326d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 327d24d4204SJose E. Roman { 328d24d4204SJose E. Roman PetscFunctionBegin; 329d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 330d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 331d24d4204SJose E. Roman PetscFunctionReturn(0); 332d24d4204SJose E. Roman } 333d24d4204SJose E. Roman 334d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 335d24d4204SJose E. Roman { 336d24d4204SJose E. Roman PetscFunctionBegin; 337d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 338d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 339d24d4204SJose E. Roman PetscFunctionReturn(0); 340d24d4204SJose E. Roman } 341d24d4204SJose E. Roman 342d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 343d24d4204SJose E. Roman { 344d24d4204SJose E. Roman PetscErrorCode ierr; 345d24d4204SJose E. Roman Mat_Product *product = C->product; 346d24d4204SJose E. Roman 347d24d4204SJose E. Roman PetscFunctionBegin; 348d24d4204SJose E. Roman switch (product->type) { 349d24d4204SJose E. Roman case MATPRODUCT_AB: 350d24d4204SJose E. Roman ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 351d24d4204SJose E. Roman break; 352d24d4204SJose E. Roman case MATPRODUCT_ABt: 353d24d4204SJose E. Roman ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 354d24d4204SJose E. Roman break; 355d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 356d24d4204SJose E. Roman } 357d24d4204SJose E. Roman PetscFunctionReturn(0); 358d24d4204SJose E. Roman } 359d24d4204SJose E. Roman /* --------------------------------------- */ 360d24d4204SJose E. Roman 361d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 362d24d4204SJose E. Roman { 363d24d4204SJose E. Roman PetscErrorCode ierr; 364d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 365d24d4204SJose E. Roman PetscScalar *darray,*d2d,v; 366d24d4204SJose E. Roman const PetscInt *ranges; 367d24d4204SJose E. Roman PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 368d24d4204SJose E. Roman 369d24d4204SJose E. Roman PetscFunctionBegin; 370d24d4204SJose E. Roman ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 371d24d4204SJose E. Roman 372d24d4204SJose E. Roman if (A->rmap->N<=A->cmap->N) { /* row version */ 373d24d4204SJose E. Roman 374d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 375d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 376d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 377d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 378d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 379d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 380d24d4204SJose E. Roman 381d24d4204SJose E. Roman /* allocate 2d vector */ 382d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 383d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 384d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 385d24d4204SJose E. Roman 386d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 387d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 388d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 389d24d4204SJose E. Roman 390d24d4204SJose E. Roman /* collect diagonal */ 391d24d4204SJose E. Roman for (j=1;j<=a->M;j++) { 392d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 393d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 394d24d4204SJose E. Roman } 395d24d4204SJose E. Roman 396d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 397d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 398d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 399d24d4204SJose E. Roman 400d24d4204SJose E. Roman } else { /* column version */ 401d24d4204SJose E. Roman 402d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 403d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 404d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 405d24d4204SJose E. Roman dlld = 1; 406d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 407d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 408d24d4204SJose E. Roman 409d24d4204SJose E. Roman /* allocate 2d vector */ 410d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 411d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 412d24d4204SJose E. Roman 413d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 414d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 415d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 416d24d4204SJose E. Roman 417d24d4204SJose E. Roman /* collect diagonal */ 418d24d4204SJose E. Roman for (j=1;j<=a->N;j++) { 419d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 420d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 421d24d4204SJose E. Roman } 422d24d4204SJose E. Roman 423d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 424d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 425d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 426d24d4204SJose E. Roman } 427d24d4204SJose E. Roman 428d24d4204SJose E. Roman ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 429d24d4204SJose E. Roman ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 430d24d4204SJose E. Roman ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 431d24d4204SJose E. Roman PetscFunctionReturn(0); 432d24d4204SJose E. Roman } 433d24d4204SJose E. Roman 434d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 435d24d4204SJose E. Roman { 436d24d4204SJose E. Roman PetscErrorCode ierr; 437d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 438d24d4204SJose E. Roman const PetscScalar *d; 439d24d4204SJose E. Roman const PetscInt *ranges; 440d24d4204SJose E. Roman PetscScalar *d2d; 441d24d4204SJose E. Roman PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 442d24d4204SJose E. Roman 443d24d4204SJose E. Roman PetscFunctionBegin; 444d24d4204SJose E. Roman if (R) { 445d24d4204SJose E. Roman ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 446d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 447d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 448d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 449d24d4204SJose E. Roman dlld = 1; 450d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 451d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 452d24d4204SJose E. Roman 453d24d4204SJose E. Roman /* allocate 2d vector */ 454d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 455d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 456d24d4204SJose E. Roman 457d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 458d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 459d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 460d24d4204SJose E. Roman 461d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 462d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 463d24d4204SJose E. Roman 464d24d4204SJose E. Roman /* broadcast along process columns */ 465d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 466d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 467d24d4204SJose E. Roman 468d24d4204SJose E. Roman /* local scaling */ 469d24d4204SJose E. Roman for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 470d24d4204SJose E. Roman 471d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 472d24d4204SJose E. Roman ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 473d24d4204SJose E. Roman } 474d24d4204SJose E. Roman if (L) { 475d24d4204SJose E. Roman ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 476d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 477d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 478d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 479d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 480d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 481d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 482d24d4204SJose E. Roman 483d24d4204SJose E. Roman /* allocate 2d vector */ 484d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 485d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 486d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 487d24d4204SJose E. Roman 488d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 489d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 490d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 491d24d4204SJose E. Roman 492d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 493d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 494d24d4204SJose E. Roman 495d24d4204SJose E. Roman /* broadcast along process rows */ 496d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 497d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 498d24d4204SJose E. Roman 499d24d4204SJose E. Roman /* local scaling */ 500d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 501d24d4204SJose E. Roman 502d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 503d24d4204SJose E. Roman ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 504d24d4204SJose E. Roman } 505d24d4204SJose E. Roman PetscFunctionReturn(0); 506d24d4204SJose E. Roman } 507d24d4204SJose E. Roman 508d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 509d24d4204SJose E. Roman { 510d24d4204SJose E. Roman PetscFunctionBegin; 511d24d4204SJose E. Roman *missing = PETSC_FALSE; 512d24d4204SJose E. Roman PetscFunctionReturn(0); 513d24d4204SJose E. Roman } 514d24d4204SJose E. Roman 515d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 516d24d4204SJose E. Roman { 517d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 518d24d4204SJose E. Roman PetscBLASInt n,one=1; 519d24d4204SJose E. Roman 520d24d4204SJose E. Roman PetscFunctionBegin; 521d24d4204SJose E. Roman n = x->lld*x->locc; 522d24d4204SJose E. Roman PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 523d24d4204SJose E. Roman PetscFunctionReturn(0); 524d24d4204SJose E. Roman } 525d24d4204SJose E. Roman 526d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 527d24d4204SJose E. Roman { 528d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 529d24d4204SJose E. Roman PetscBLASInt i,n; 530d24d4204SJose E. Roman PetscScalar v; 531d24d4204SJose E. Roman 532d24d4204SJose E. Roman PetscFunctionBegin; 533d24d4204SJose E. Roman n = PetscMin(x->M,x->N); 534d24d4204SJose E. Roman for (i=1;i<=n;i++) { 535d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 536d24d4204SJose E. Roman v += alpha; 537d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 538d24d4204SJose E. Roman } 539d24d4204SJose E. Roman PetscFunctionReturn(0); 540d24d4204SJose E. Roman } 541d24d4204SJose E. Roman 542d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 543d24d4204SJose E. Roman { 544d24d4204SJose E. Roman PetscErrorCode ierr; 545d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 547d24d4204SJose E. Roman PetscBLASInt one=1; 548d24d4204SJose E. Roman PetscScalar beta=1.0; 549d24d4204SJose E. Roman 550d24d4204SJose E. Roman PetscFunctionBegin; 551d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y,1,X,3); 552d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 553d24d4204SJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 554d24d4204SJose E. Roman PetscFunctionReturn(0); 555d24d4204SJose E. Roman } 556d24d4204SJose E. Roman 557d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 558d24d4204SJose E. Roman { 559d24d4204SJose E. Roman PetscErrorCode ierr; 560d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 561d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 562d24d4204SJose E. Roman 563d24d4204SJose E. Roman PetscFunctionBegin; 564d24d4204SJose E. Roman ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 565d24d4204SJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 566d24d4204SJose E. Roman PetscFunctionReturn(0); 567d24d4204SJose E. Roman } 568d24d4204SJose E. Roman 569d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 570d24d4204SJose E. Roman { 571d24d4204SJose E. Roman Mat Bs; 572d24d4204SJose E. Roman MPI_Comm comm; 573d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 574d24d4204SJose E. Roman PetscErrorCode ierr; 575d24d4204SJose E. Roman 576d24d4204SJose E. Roman PetscFunctionBegin; 577d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 578d24d4204SJose E. Roman ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 579d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 580d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 581d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 582d24d4204SJose E. Roman b->M = a->M; 583d24d4204SJose E. Roman b->N = a->N; 584d24d4204SJose E. Roman b->mb = a->mb; 585d24d4204SJose E. Roman b->nb = a->nb; 586d24d4204SJose E. Roman b->rsrc = a->rsrc; 587d24d4204SJose E. Roman b->csrc = a->csrc; 588d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 589d24d4204SJose E. Roman *B = Bs; 590d24d4204SJose E. Roman if (op == MAT_COPY_VALUES) { 591d24d4204SJose E. Roman ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 592d24d4204SJose E. Roman } 593d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 594d24d4204SJose E. Roman PetscFunctionReturn(0); 595d24d4204SJose E. Roman } 596d24d4204SJose E. Roman 597d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 598d24d4204SJose E. Roman { 599d24d4204SJose E. Roman PetscErrorCode ierr; 600d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601d24d4204SJose E. Roman Mat Bs = *B; 602d24d4204SJose E. Roman PetscBLASInt one=1; 603d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 604d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 605d24d4204SJose E. Roman PetscInt i,j; 606d24d4204SJose E. Roman #endif 607d24d4204SJose E. Roman 608d24d4204SJose E. Roman PetscFunctionBegin; 609d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 610d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 611d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 612d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 613d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 614d24d4204SJose E. Roman *B = Bs; 615d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 616d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 617d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 618d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 619d24d4204SJose E. Roman /* undo conjugation */ 620d24d4204SJose E. Roman for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 621d24d4204SJose E. Roman #endif 622d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 623d24d4204SJose E. Roman PetscFunctionReturn(0); 624d24d4204SJose E. Roman } 625d24d4204SJose E. Roman 626d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 627d24d4204SJose E. Roman { 628d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 629d24d4204SJose E. Roman PetscInt i,j; 630d24d4204SJose E. Roman 631d24d4204SJose E. Roman PetscFunctionBegin; 632d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 633d24d4204SJose E. Roman PetscFunctionReturn(0); 634d24d4204SJose E. Roman } 635d24d4204SJose E. Roman 636d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 637d24d4204SJose E. Roman { 638d24d4204SJose E. Roman PetscErrorCode ierr; 639d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 640d24d4204SJose E. Roman Mat Bs = *B; 641d24d4204SJose E. Roman PetscBLASInt one=1; 642d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 643d24d4204SJose E. Roman 644d24d4204SJose E. Roman PetscFunctionBegin; 645d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 646d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 647d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 648d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 649d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 650d24d4204SJose E. Roman *B = Bs; 651d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 652d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 653d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 654d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 655d24d4204SJose E. Roman PetscFunctionReturn(0); 656d24d4204SJose E. Roman } 657d24d4204SJose E. Roman 658d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 659d24d4204SJose E. Roman { 660d24d4204SJose E. Roman PetscErrorCode ierr; 661d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 662d24d4204SJose E. Roman PetscScalar *x,*x2d; 663d24d4204SJose E. Roman const PetscInt *ranges; 664d24d4204SJose E. Roman PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 665d24d4204SJose E. Roman 666d24d4204SJose E. Roman PetscFunctionBegin; 667d24d4204SJose E. Roman ierr = VecCopy(B,X);CHKERRQ(ierr); 668d24d4204SJose E. Roman ierr = VecGetArray(X,&x);CHKERRQ(ierr); 669d24d4204SJose E. Roman 670d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 671d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 672d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 673d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 674d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 675d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 676d24d4204SJose E. Roman 677d24d4204SJose E. Roman /* allocate 2d vector */ 678d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 679d24d4204SJose E. Roman ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 680d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 681d24d4204SJose E. Roman 682d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 683d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 684d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 685d24d4204SJose E. Roman 686d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 687d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 688d24d4204SJose E. Roman 689d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 690d24d4204SJose E. Roman switch (A->factortype) { 691d24d4204SJose E. Roman case MAT_FACTOR_LU: 692d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 693d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 694d24d4204SJose E. Roman break; 695d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 696d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 697d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 698d24d4204SJose E. Roman break; 699d24d4204SJose E. Roman default: 700d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 701d24d4204SJose E. Roman break; 702d24d4204SJose E. Roman } 703d24d4204SJose E. Roman 704d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 705d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 706d24d4204SJose E. Roman 707d24d4204SJose E. Roman ierr = PetscFree(x2d);CHKERRQ(ierr); 708d24d4204SJose E. Roman ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 709d24d4204SJose E. Roman PetscFunctionReturn(0); 710d24d4204SJose E. Roman } 711d24d4204SJose E. Roman 712d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 713d24d4204SJose E. Roman { 714d24d4204SJose E. Roman PetscErrorCode ierr; 715d24d4204SJose E. Roman 716d24d4204SJose E. Roman PetscFunctionBegin; 717d24d4204SJose E. Roman ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 718d24d4204SJose E. Roman ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 719d24d4204SJose E. Roman PetscFunctionReturn(0); 720d24d4204SJose E. Roman } 721d24d4204SJose E. Roman 722d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 723d24d4204SJose E. Roman { 724d24d4204SJose E. Roman PetscErrorCode ierr; 725d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 726d24d4204SJose E. Roman PetscBool flg1,flg2; 727d24d4204SJose E. Roman PetscBLASInt one=1,info; 728d24d4204SJose E. Roman 729d24d4204SJose E. Roman PetscFunctionBegin; 730d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 731d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 732d24d4204SJose E. Roman if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 733d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B,1,X,2); 734d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)B->data; 735d24d4204SJose E. Roman x = (Mat_ScaLAPACK*)X->data; 736d24d4204SJose E. Roman ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 737d24d4204SJose E. Roman 738d24d4204SJose E. Roman switch (A->factortype) { 739d24d4204SJose E. Roman case MAT_FACTOR_LU: 740d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 741d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 742d24d4204SJose E. Roman break; 743d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 744d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 745d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 746d24d4204SJose E. Roman break; 747d24d4204SJose E. Roman default: 748d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 749d24d4204SJose E. Roman break; 750d24d4204SJose E. Roman } 751d24d4204SJose E. Roman PetscFunctionReturn(0); 752d24d4204SJose E. Roman } 753d24d4204SJose E. Roman 754d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 755d24d4204SJose E. Roman { 756d24d4204SJose E. Roman PetscErrorCode ierr; 757d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 758d24d4204SJose E. Roman PetscBLASInt one=1,info; 759d24d4204SJose E. Roman 760d24d4204SJose E. Roman PetscFunctionBegin; 761d24d4204SJose E. Roman if (!a->pivots) { 762d24d4204SJose E. Roman ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 763d24d4204SJose E. Roman ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 764d24d4204SJose E. Roman } 765d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 766d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf",info); 767d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 768d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 769d24d4204SJose E. Roman 770d24d4204SJose E. Roman ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 771d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 772d24d4204SJose E. Roman PetscFunctionReturn(0); 773d24d4204SJose E. Roman } 774d24d4204SJose E. Roman 775d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 776d24d4204SJose E. Roman { 777d24d4204SJose E. Roman PetscErrorCode ierr; 778d24d4204SJose E. Roman 779d24d4204SJose E. Roman PetscFunctionBegin; 780d24d4204SJose E. Roman ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 781d24d4204SJose E. Roman ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 782d24d4204SJose E. Roman PetscFunctionReturn(0); 783d24d4204SJose E. Roman } 784d24d4204SJose E. Roman 785d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 786d24d4204SJose E. Roman { 787d24d4204SJose E. Roman PetscFunctionBegin; 788d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 789d24d4204SJose E. Roman PetscFunctionReturn(0); 790d24d4204SJose E. Roman } 791d24d4204SJose E. Roman 792d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 793d24d4204SJose E. Roman { 794d24d4204SJose E. Roman PetscErrorCode ierr; 795d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 796d24d4204SJose E. Roman PetscBLASInt one=1,info; 797d24d4204SJose E. Roman 798d24d4204SJose E. Roman PetscFunctionBegin; 799d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 800d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf",info); 801d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 802d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 803d24d4204SJose E. Roman 804d24d4204SJose E. Roman ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 805d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 806d24d4204SJose E. Roman PetscFunctionReturn(0); 807d24d4204SJose E. Roman } 808d24d4204SJose E. Roman 809d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 810d24d4204SJose E. Roman { 811d24d4204SJose E. Roman PetscErrorCode ierr; 812d24d4204SJose E. Roman 813d24d4204SJose E. Roman PetscFunctionBegin; 814d24d4204SJose E. Roman ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 815d24d4204SJose E. Roman ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 816d24d4204SJose E. Roman PetscFunctionReturn(0); 817d24d4204SJose E. Roman } 818d24d4204SJose E. Roman 819d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 820d24d4204SJose E. Roman { 821d24d4204SJose E. Roman PetscFunctionBegin; 822d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 823d24d4204SJose E. Roman PetscFunctionReturn(0); 824d24d4204SJose E. Roman } 825d24d4204SJose E. Roman 826d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 827d24d4204SJose E. Roman { 828d24d4204SJose E. Roman PetscFunctionBegin; 829d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 830d24d4204SJose E. Roman PetscFunctionReturn(0); 831d24d4204SJose E. Roman } 832d24d4204SJose E. Roman 833d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 834d24d4204SJose E. Roman { 835d24d4204SJose E. Roman Mat B; 836d24d4204SJose E. Roman PetscErrorCode ierr; 837d24d4204SJose E. Roman 838d24d4204SJose E. Roman PetscFunctionBegin; 839d24d4204SJose E. Roman /* Create the factorization matrix */ 840d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 841d24d4204SJose E. Roman ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 842d24d4204SJose E. Roman ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr); 843d24d4204SJose E. Roman ierr = MatSetUp(B);CHKERRQ(ierr); 844d24d4204SJose E. Roman B->factortype = ftype; 845d24d4204SJose E. Roman ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 846d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 847d24d4204SJose E. Roman 848d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 849d24d4204SJose E. Roman *F = B; 850d24d4204SJose E. Roman PetscFunctionReturn(0); 851d24d4204SJose E. Roman } 852d24d4204SJose E. Roman 853d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 854d24d4204SJose E. Roman { 855d24d4204SJose E. Roman PetscErrorCode ierr; 856d24d4204SJose E. Roman 857d24d4204SJose E. Roman PetscFunctionBegin; 858d24d4204SJose E. Roman ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 859d24d4204SJose E. Roman ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 860d24d4204SJose E. Roman PetscFunctionReturn(0); 861d24d4204SJose E. Roman } 862d24d4204SJose E. Roman 863d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 864d24d4204SJose E. Roman { 865d24d4204SJose E. Roman PetscErrorCode ierr; 866d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 867d24d4204SJose E. Roman PetscBLASInt one=1,lwork=0; 868d24d4204SJose E. Roman const char *ntype; 869d24d4204SJose E. Roman PetscScalar *work,dummy; 870d24d4204SJose E. Roman 871d24d4204SJose E. Roman PetscFunctionBegin; 872d24d4204SJose E. Roman switch (type){ 873d24d4204SJose E. Roman case NORM_1: 874d24d4204SJose E. Roman ntype = "1"; 875d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 876d24d4204SJose E. Roman break; 877d24d4204SJose E. Roman case NORM_FROBENIUS: 878d24d4204SJose E. Roman ntype = "F"; 879d24d4204SJose E. Roman work = &dummy; 880d24d4204SJose E. Roman break; 881d24d4204SJose E. Roman case NORM_INFINITY: 882d24d4204SJose E. Roman ntype = "I"; 883d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 884d24d4204SJose E. Roman break; 885d24d4204SJose E. Roman default: 886d24d4204SJose E. Roman SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 887d24d4204SJose E. Roman } 888d24d4204SJose E. Roman if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 889d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 890d24d4204SJose E. Roman if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 891d24d4204SJose E. Roman PetscFunctionReturn(0); 892d24d4204SJose E. Roman } 893d24d4204SJose E. Roman 894d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 895d24d4204SJose E. Roman { 896d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 897d24d4204SJose E. Roman PetscErrorCode ierr; 898d24d4204SJose E. Roman 899d24d4204SJose E. Roman PetscFunctionBegin; 900d24d4204SJose E. Roman ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 901d24d4204SJose E. Roman PetscFunctionReturn(0); 902d24d4204SJose E. Roman } 903d24d4204SJose E. Roman 904d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 905d24d4204SJose E. Roman { 906d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 907d24d4204SJose E. Roman PetscErrorCode ierr; 908d24d4204SJose E. Roman PetscInt i,n,nb,isrc,nproc,iproc,*idx; 909d24d4204SJose E. Roman 910d24d4204SJose E. Roman PetscFunctionBegin; 911d24d4204SJose E. Roman if (rows) { 912d24d4204SJose E. Roman n = a->locr; 913d24d4204SJose E. Roman nb = a->mb; 914d24d4204SJose E. Roman isrc = a->rsrc; 915d24d4204SJose E. Roman nproc = a->grid->nprow; 916d24d4204SJose E. Roman iproc = a->grid->myrow; 917d24d4204SJose E. Roman ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 918d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 919d24d4204SJose E. Roman ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 920d24d4204SJose E. Roman } 921d24d4204SJose E. Roman if (cols) { 922d24d4204SJose E. Roman n = a->locc; 923d24d4204SJose E. Roman nb = a->nb; 924d24d4204SJose E. Roman isrc = a->csrc; 925d24d4204SJose E. Roman nproc = a->grid->npcol; 926d24d4204SJose E. Roman iproc = a->grid->mycol; 927d24d4204SJose E. Roman ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 928d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 929d24d4204SJose E. Roman ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 930d24d4204SJose E. Roman } 931d24d4204SJose E. Roman PetscFunctionReturn(0); 932d24d4204SJose E. Roman } 933d24d4204SJose E. Roman 934d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 935d24d4204SJose E. Roman { 936d24d4204SJose E. Roman PetscErrorCode ierr; 937d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 938d24d4204SJose E. Roman Mat Bmpi; 939d24d4204SJose E. Roman MPI_Comm comm; 940d24d4204SJose E. Roman const PetscInt *ranges; 941d24d4204SJose E. Roman PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 942d24d4204SJose E. Roman PetscScalar *barray; 943d24d4204SJose E. Roman 944d24d4204SJose E. Roman PetscFunctionBegin; 945d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 946d24d4204SJose E. Roman 947d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 948d24d4204SJose E. Roman else { 949d24d4204SJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 950d24d4204SJose E. Roman ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 951d24d4204SJose E. Roman ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 952d24d4204SJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 953d24d4204SJose E. Roman } 954d24d4204SJose E. Roman 955d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 956d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 957d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 958d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 959d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 960d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 961d24d4204SJose E. Roman 962d24d4204SJose E. Roman /* redistribute matrix */ 963d24d4204SJose E. Roman ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 964d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 965d24d4204SJose E. Roman ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 966d24d4204SJose E. Roman 967d24d4204SJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968d24d4204SJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 970d24d4204SJose E. Roman ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 971d24d4204SJose E. Roman } else *B = Bmpi; 972d24d4204SJose E. Roman PetscFunctionReturn(0); 973d24d4204SJose E. Roman } 974d24d4204SJose E. Roman 975d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 976d24d4204SJose E. Roman { 977d24d4204SJose E. Roman PetscErrorCode ierr; 978d24d4204SJose E. Roman Mat_ScaLAPACK *b; 979d24d4204SJose E. Roman Mat Bmpi; 980d24d4204SJose E. Roman MPI_Comm comm; 981d24d4204SJose E. Roman const PetscInt *ranges; 982d24d4204SJose E. Roman PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 983d24d4204SJose E. Roman PetscScalar *aarray; 984*4e8b52a1SJose E. Roman PetscInt lda; 985d24d4204SJose E. Roman 986d24d4204SJose E. Roman PetscFunctionBegin; 987d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 988d24d4204SJose E. Roman 989d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 990d24d4204SJose E. Roman else { 991d24d4204SJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 992d24d4204SJose E. Roman ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 993d24d4204SJose E. Roman ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 994d24d4204SJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 995d24d4204SJose E. Roman } 996d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bmpi->data; 997d24d4204SJose E. Roman 998d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 999d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1000d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1001*4e8b52a1SJose E. Roman ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1002*4e8b52a1SJose E. Roman lld = PetscMax(lda,1); /* local leading dimension */ 1003d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1004d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1005d24d4204SJose E. Roman 1006d24d4204SJose E. Roman /* redistribute matrix */ 1007d24d4204SJose E. Roman ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1008d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1009d24d4204SJose E. Roman ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1010d24d4204SJose E. Roman 1011d24d4204SJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1012d24d4204SJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1013d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 1014d24d4204SJose E. Roman ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1015d24d4204SJose E. Roman } else *B = Bmpi; 1016d24d4204SJose E. Roman PetscFunctionReturn(0); 1017d24d4204SJose E. Roman } 1018d24d4204SJose E. Roman 1019d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1020d24d4204SJose E. Roman { 1021d24d4204SJose E. Roman Mat mat_scal; 1022d24d4204SJose E. Roman PetscErrorCode ierr; 1023d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1024d24d4204SJose E. Roman const PetscInt *cols; 1025d24d4204SJose E. Roman const PetscScalar *vals; 1026d24d4204SJose E. Roman 1027d24d4204SJose E. Roman PetscFunctionBegin; 1028d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1029d24d4204SJose E. Roman mat_scal = *newmat; 1030d24d4204SJose E. Roman ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1031d24d4204SJose E. Roman } else { 1032d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1033d24d4204SJose E. Roman m = PETSC_DECIDE; 1034d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1035d24d4204SJose E. Roman n = PETSC_DECIDE; 1036d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1037d24d4204SJose E. Roman ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1038d24d4204SJose E. Roman ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1039d24d4204SJose E. Roman ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1040d24d4204SJose E. Roman } 1041d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 1042d24d4204SJose E. Roman ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1043d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1044d24d4204SJose E. Roman ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1045d24d4204SJose E. Roman } 1046d24d4204SJose E. Roman ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047d24d4204SJose E. Roman ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1048d24d4204SJose E. Roman 1049d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1050d24d4204SJose E. Roman else *newmat = mat_scal; 1051d24d4204SJose E. Roman PetscFunctionReturn(0); 1052d24d4204SJose E. Roman } 1053d24d4204SJose E. Roman 1054d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1055d24d4204SJose E. Roman { 1056d24d4204SJose E. Roman Mat mat_scal; 1057d24d4204SJose E. Roman PetscErrorCode ierr; 1058d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1059d24d4204SJose E. Roman const PetscInt *cols; 1060d24d4204SJose E. Roman const PetscScalar *vals; 1061d24d4204SJose E. Roman PetscScalar v; 1062d24d4204SJose E. Roman 1063d24d4204SJose E. Roman PetscFunctionBegin; 1064d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1065d24d4204SJose E. Roman mat_scal = *newmat; 1066d24d4204SJose E. Roman ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1067d24d4204SJose E. Roman } else { 1068d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1069d24d4204SJose E. Roman m = PETSC_DECIDE; 1070d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1071d24d4204SJose E. Roman n = PETSC_DECIDE; 1072d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1073d24d4204SJose E. Roman ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1074d24d4204SJose E. Roman ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1075d24d4204SJose E. Roman ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1076d24d4204SJose E. Roman } 1077d24d4204SJose E. Roman ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1078d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 1079d24d4204SJose E. Roman ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1080d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1081d24d4204SJose E. Roman for (j=0;j<ncols;j++) { /* lower triangular part */ 1082d24d4204SJose E. Roman if (cols[j] == row) continue; 1083d24d4204SJose E. Roman v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1084d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1085d24d4204SJose E. Roman } 1086d24d4204SJose E. Roman ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1087d24d4204SJose E. Roman } 1088d24d4204SJose E. Roman ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1089d24d4204SJose E. Roman ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1090d24d4204SJose E. Roman ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1091d24d4204SJose E. Roman 1092d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1093d24d4204SJose E. Roman else *newmat = mat_scal; 1094d24d4204SJose E. Roman PetscFunctionReturn(0); 1095d24d4204SJose E. Roman } 1096d24d4204SJose E. Roman 1097d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1098d24d4204SJose E. Roman { 1099d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1100d24d4204SJose E. Roman PetscErrorCode ierr; 1101d24d4204SJose E. Roman PetscInt sz=0; 1102d24d4204SJose E. Roman 1103d24d4204SJose E. Roman PetscFunctionBegin; 1104d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1105d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1106d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1107d24d4204SJose E. Roman 1108d24d4204SJose E. Roman ierr = PetscFree(a->loc);CHKERRQ(ierr); 1109d24d4204SJose E. Roman ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1110d24d4204SJose E. Roman ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1111d24d4204SJose E. Roman ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1112d24d4204SJose E. Roman 1113d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1114d24d4204SJose E. Roman PetscFunctionReturn(0); 1115d24d4204SJose E. Roman } 1116d24d4204SJose E. Roman 1117d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1118d24d4204SJose E. Roman { 1119d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1120d24d4204SJose E. Roman PetscErrorCode ierr; 1121d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1122d24d4204SJose E. Roman PetscBool flg; 1123d24d4204SJose E. Roman MPI_Comm icomm; 1124d24d4204SJose E. Roman 1125d24d4204SJose E. Roman PetscFunctionBegin; 1126d24d4204SJose E. Roman ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1127d24d4204SJose E. Roman ierr = PetscFree(a->loc);CHKERRQ(ierr); 1128d24d4204SJose E. Roman ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1129d24d4204SJose E. Roman ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1130d24d4204SJose E. Roman ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1131d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1132d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1133d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1134d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 1135d24d4204SJose E. Roman ierr = PetscFree(grid);CHKERRQ(ierr); 1136d24d4204SJose E. Roman ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 1137d24d4204SJose E. Roman } 1138d24d4204SJose E. Roman ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1139d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1140d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1141d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1142d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1143d24d4204SJose E. Roman ierr = PetscFree(A->data);CHKERRQ(ierr); 1144d24d4204SJose E. Roman PetscFunctionReturn(0); 1145d24d4204SJose E. Roman } 1146d24d4204SJose E. Roman 1147d24d4204SJose E. Roman PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1148d24d4204SJose E. Roman { 1149d24d4204SJose E. Roman PetscErrorCode ierr; 1150d24d4204SJose E. Roman const PetscInt *ranges; 1151d24d4204SJose E. Roman PetscMPIInt size; 1152d24d4204SJose E. Roman PetscInt i,n; 1153d24d4204SJose E. Roman 1154d24d4204SJose E. Roman PetscFunctionBegin; 1155d24d4204SJose E. Roman ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr); 1156d24d4204SJose E. Roman if (size>2) { 1157d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1158d24d4204SJose E. Roman n = ranges[1]-ranges[0]; 1159d24d4204SJose E. Roman for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1160d24d4204SJose E. Roman if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1161d24d4204SJose E. Roman } 1162d24d4204SJose E. Roman PetscFunctionReturn(0); 1163d24d4204SJose E. Roman } 1164d24d4204SJose E. Roman 1165d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1166d24d4204SJose E. Roman { 1167d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1168d24d4204SJose E. Roman PetscErrorCode ierr; 1169d24d4204SJose E. Roman PetscBLASInt info=0; 1170d24d4204SJose E. Roman 1171d24d4204SJose E. Roman PetscFunctionBegin; 1172d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1173d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1174d24d4204SJose E. Roman 1175d24d4204SJose E. Roman /* check that the layout is as enforced by MatCreateScaLAPACK */ 1176d24d4204SJose E. Roman ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1177d24d4204SJose E. Roman ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1178d24d4204SJose E. Roman 1179d24d4204SJose E. Roman /* compute local sizes */ 1180d24d4204SJose E. Roman ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1181d24d4204SJose E. Roman ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1182d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1183d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1184d24d4204SJose E. Roman a->lld = PetscMax(1,a->locr); 1185d24d4204SJose E. Roman 1186d24d4204SJose E. Roman /* allocate local array */ 1187d24d4204SJose E. Roman ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1188d24d4204SJose E. Roman 1189d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1190d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1191d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1192d24d4204SJose E. Roman PetscFunctionReturn(0); 1193d24d4204SJose E. Roman } 1194d24d4204SJose E. Roman 1195d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1196d24d4204SJose E. Roman { 1197d24d4204SJose E. Roman PetscErrorCode ierr; 1198d24d4204SJose E. Roman PetscInt nstash,reallocs; 1199d24d4204SJose E. Roman 1200d24d4204SJose E. Roman PetscFunctionBegin; 1201d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1202d24d4204SJose E. Roman ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1203d24d4204SJose E. Roman ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1204d24d4204SJose E. Roman ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1205d24d4204SJose E. Roman PetscFunctionReturn(0); 1206d24d4204SJose E. Roman } 1207d24d4204SJose E. Roman 1208d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1209d24d4204SJose E. Roman { 1210d24d4204SJose E. Roman PetscErrorCode ierr; 1211d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1212d24d4204SJose E. Roman PetscMPIInt n; 1213d24d4204SJose E. Roman PetscInt i,flg,*row,*col; 1214d24d4204SJose E. Roman PetscScalar *val; 1215d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1216d24d4204SJose E. Roman 1217d24d4204SJose E. Roman PetscFunctionBegin; 1218d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1219d24d4204SJose E. Roman while (1) { 1220d24d4204SJose E. Roman ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1221d24d4204SJose E. Roman if (!flg) break; 1222d24d4204SJose E. Roman for (i=0;i<n;i++) { 1223d24d4204SJose E. Roman ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1224d24d4204SJose E. Roman ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1225d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1226d24d4204SJose E. Roman if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1227d24d4204SJose E. Roman switch (A->insertmode) { 1228d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1229d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1230d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1231d24d4204SJose E. Roman } 1232d24d4204SJose E. Roman } 1233d24d4204SJose E. Roman } 1234d24d4204SJose E. Roman ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1235d24d4204SJose E. Roman PetscFunctionReturn(0); 1236d24d4204SJose E. Roman } 1237d24d4204SJose E. Roman 1238d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1239d24d4204SJose E. Roman { 1240d24d4204SJose E. Roman PetscErrorCode ierr; 1241d24d4204SJose E. Roman Mat Adense,As; 1242d24d4204SJose E. Roman MPI_Comm comm; 1243d24d4204SJose E. Roman 1244d24d4204SJose E. Roman PetscFunctionBegin; 1245d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1246d24d4204SJose E. Roman ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1247d24d4204SJose E. Roman ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1248d24d4204SJose E. Roman ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1249d24d4204SJose E. Roman ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1250d24d4204SJose E. Roman ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1251d24d4204SJose E. Roman ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1252d24d4204SJose E. Roman PetscFunctionReturn(0); 1253d24d4204SJose E. Roman } 1254d24d4204SJose E. Roman 1255d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 1256d24d4204SJose E. Roman static struct _MatOps MatOps_Values = { 1257d24d4204SJose E. Roman MatSetValues_ScaLAPACK, 1258d24d4204SJose E. Roman 0, 1259d24d4204SJose E. Roman 0, 1260d24d4204SJose E. Roman MatMult_ScaLAPACK, 1261d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1262d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1263d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1264d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1265d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1266d24d4204SJose E. Roman 0, 1267d24d4204SJose E. Roman /*10*/ 0, 1268d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1269d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1270d24d4204SJose E. Roman 0, 1271d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1272d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1273d24d4204SJose E. Roman 0, 1274d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1275d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1276d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1277d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1278d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1279d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1280d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1281d24d4204SJose E. Roman /*24*/ 0, 1282d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1283d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1284d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1285d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1286d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1287d24d4204SJose E. Roman 0, 1288d24d4204SJose E. Roman 0, 1289d24d4204SJose E. Roman 0, 1290d24d4204SJose E. Roman 0, 1291d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1292d24d4204SJose E. Roman 0, 1293d24d4204SJose E. Roman 0, 1294d24d4204SJose E. Roman 0, 1295d24d4204SJose E. Roman 0, 1296d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1297d24d4204SJose E. Roman 0, 1298d24d4204SJose E. Roman 0, 1299d24d4204SJose E. Roman 0, 1300d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1301d24d4204SJose E. Roman /*44*/ 0, 1302d24d4204SJose E. Roman MatScale_ScaLAPACK, 1303d24d4204SJose E. Roman MatShift_ScaLAPACK, 1304d24d4204SJose E. Roman 0, 1305d24d4204SJose E. Roman 0, 1306d24d4204SJose E. Roman /*49*/ 0, 1307d24d4204SJose E. Roman 0, 1308d24d4204SJose E. Roman 0, 1309d24d4204SJose E. Roman 0, 1310d24d4204SJose E. Roman 0, 1311d24d4204SJose E. Roman /*54*/ 0, 1312d24d4204SJose E. Roman 0, 1313d24d4204SJose E. Roman 0, 1314d24d4204SJose E. Roman 0, 1315d24d4204SJose E. Roman 0, 1316d24d4204SJose E. Roman /*59*/ 0, 1317d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1318d24d4204SJose E. Roman MatView_ScaLAPACK, 1319d24d4204SJose E. Roman 0, 1320d24d4204SJose E. Roman 0, 1321d24d4204SJose E. Roman /*64*/ 0, 1322d24d4204SJose E. Roman 0, 1323d24d4204SJose E. Roman 0, 1324d24d4204SJose E. Roman 0, 1325d24d4204SJose E. Roman 0, 1326d24d4204SJose E. Roman /*69*/ 0, 1327d24d4204SJose E. Roman 0, 1328d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1329d24d4204SJose E. Roman 0, 1330d24d4204SJose E. Roman 0, 1331d24d4204SJose E. Roman /*74*/ 0, 1332d24d4204SJose E. Roman 0, 1333d24d4204SJose E. Roman 0, 1334d24d4204SJose E. Roman 0, 1335d24d4204SJose E. Roman 0, 1336d24d4204SJose E. Roman /*79*/ 0, 1337d24d4204SJose E. Roman 0, 1338d24d4204SJose E. Roman 0, 1339d24d4204SJose E. Roman 0, 1340d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1341d24d4204SJose E. Roman /*84*/ 0, 1342d24d4204SJose E. Roman 0, 1343d24d4204SJose E. Roman 0, 1344d24d4204SJose E. Roman 0, 1345d24d4204SJose E. Roman 0, 1346d24d4204SJose E. Roman /*89*/ 0, 1347d24d4204SJose E. Roman 0, 1348d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1349d24d4204SJose E. Roman 0, 1350d24d4204SJose E. Roman 0, 1351d24d4204SJose E. Roman /*94*/ 0, 1352d24d4204SJose E. Roman 0, 1353d24d4204SJose E. Roman 0, 1354d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1355d24d4204SJose E. Roman 0, 1356d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1357d24d4204SJose E. Roman 0, 1358d24d4204SJose E. Roman 0, 1359d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1360d24d4204SJose E. Roman 0, 1361d24d4204SJose E. Roman /*104*/0, 1362d24d4204SJose E. Roman 0, 1363d24d4204SJose E. Roman 0, 1364d24d4204SJose E. Roman 0, 1365d24d4204SJose E. Roman 0, 1366d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK, 1367d24d4204SJose E. Roman 0, 1368d24d4204SJose E. Roman 0, 1369d24d4204SJose E. Roman 0, 1370d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1371d24d4204SJose E. Roman /*114*/0, 1372d24d4204SJose E. Roman 0, 1373d24d4204SJose E. Roman 0, 1374d24d4204SJose E. Roman 0, 1375d24d4204SJose E. Roman 0, 1376d24d4204SJose E. Roman /*119*/0, 1377d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1378d24d4204SJose E. Roman 0, 1379d24d4204SJose E. Roman 0, 1380d24d4204SJose E. Roman 0, 1381d24d4204SJose E. Roman /*124*/0, 1382d24d4204SJose E. Roman 0, 1383d24d4204SJose E. Roman 0, 1384d24d4204SJose E. Roman 0, 1385d24d4204SJose E. Roman 0, 1386d24d4204SJose E. Roman /*129*/0, 1387d24d4204SJose E. Roman 0, 1388d24d4204SJose E. Roman 0, 1389d24d4204SJose E. Roman 0, 1390d24d4204SJose E. Roman 0, 1391d24d4204SJose E. Roman /*134*/0, 1392d24d4204SJose E. Roman 0, 1393d24d4204SJose E. Roman 0, 1394d24d4204SJose E. Roman 0, 1395d24d4204SJose E. Roman 0, 1396d24d4204SJose E. Roman 0, 1397d24d4204SJose E. Roman /*140*/0, 1398d24d4204SJose E. Roman 0, 1399d24d4204SJose E. Roman 0, 1400d24d4204SJose E. Roman 0, 1401d24d4204SJose E. Roman 0, 1402d24d4204SJose E. Roman /*145*/0, 1403d24d4204SJose E. Roman 0, 1404d24d4204SJose E. Roman 0 1405d24d4204SJose E. Roman }; 1406d24d4204SJose E. Roman 1407d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1408d24d4204SJose E. Roman { 1409d24d4204SJose E. Roman PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1410d24d4204SJose E. Roman PetscInt size=stash->size,nsends; 1411d24d4204SJose E. Roman PetscErrorCode ierr; 1412d24d4204SJose E. Roman PetscInt count,*sindices,**rindices,i,j,l; 1413d24d4204SJose E. Roman PetscScalar **rvalues,*svalues; 1414d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1415d24d4204SJose E. Roman MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1416d24d4204SJose E. Roman PetscMPIInt *sizes,*nlengths,nreceives; 1417d24d4204SJose E. Roman PetscInt *sp_idx,*sp_idy; 1418d24d4204SJose E. Roman PetscScalar *sp_val; 1419d24d4204SJose E. Roman PetscMatStashSpace space,space_next; 1420d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1421d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1422d24d4204SJose E. Roman 1423d24d4204SJose E. Roman PetscFunctionBegin; 1424d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1425d24d4204SJose E. Roman InsertMode addv; 1426d24d4204SJose E. Roman ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1427d24d4204SJose E. Roman if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1428d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1429d24d4204SJose E. Roman } 1430d24d4204SJose E. Roman 1431d24d4204SJose E. Roman bs2 = stash->bs*stash->bs; 1432d24d4204SJose E. Roman 1433d24d4204SJose E. Roman /* first count number of contributors to each processor */ 1434d24d4204SJose E. Roman ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1435d24d4204SJose E. Roman ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1436d24d4204SJose E. Roman 1437d24d4204SJose E. Roman i = j = 0; 1438d24d4204SJose E. Roman space = stash->space_head; 1439d24d4204SJose E. Roman while (space) { 1440d24d4204SJose E. Roman space_next = space->next; 1441d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1442d24d4204SJose E. Roman ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1443d24d4204SJose E. Roman ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1444d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1445d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1446d24d4204SJose E. Roman nlengths[j]++; owner[i] = j; 1447d24d4204SJose E. Roman i++; 1448d24d4204SJose E. Roman } 1449d24d4204SJose E. Roman space = space_next; 1450d24d4204SJose E. Roman } 1451d24d4204SJose E. Roman 1452d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 1453d24d4204SJose E. Roman ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1454d24d4204SJose E. Roman for (i=0, nsends=0; i<size; i++) { 1455d24d4204SJose E. Roman if (nlengths[i]) { 1456d24d4204SJose E. Roman sizes[i] = 1; nsends++; 1457d24d4204SJose E. Roman } 1458d24d4204SJose E. Roman } 1459d24d4204SJose E. Roman 1460d24d4204SJose E. Roman {PetscMPIInt *onodes,*olengths; 1461d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 1462d24d4204SJose E. Roman ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1463d24d4204SJose E. Roman ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1464d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1465d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] *=2; 1466d24d4204SJose E. Roman ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1467d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1468d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1469d24d4204SJose E. Roman ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1470d24d4204SJose E. Roman ierr = PetscFree(onodes);CHKERRQ(ierr); 1471d24d4204SJose E. Roman ierr = PetscFree(olengths);CHKERRQ(ierr);} 1472d24d4204SJose E. Roman 1473d24d4204SJose E. Roman /* do sends: 1474d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1475d24d4204SJose E. Roman the ith processor 1476d24d4204SJose E. Roman */ 1477d24d4204SJose E. Roman ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1478d24d4204SJose E. Roman ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1479d24d4204SJose E. Roman ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1480d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 1481d24d4204SJose E. Roman startv[0] = 0; starti[0] = 0; 1482d24d4204SJose E. Roman for (i=1; i<size; i++) { 1483d24d4204SJose E. Roman startv[i] = startv[i-1] + nlengths[i-1]; 1484d24d4204SJose E. Roman starti[i] = starti[i-1] + 2*nlengths[i-1]; 1485d24d4204SJose E. Roman } 1486d24d4204SJose E. Roman 1487d24d4204SJose E. Roman i = 0; 1488d24d4204SJose E. Roman space = stash->space_head; 1489d24d4204SJose E. Roman while (space) { 1490d24d4204SJose E. Roman space_next = space->next; 1491d24d4204SJose E. Roman sp_idx = space->idx; 1492d24d4204SJose E. Roman sp_idy = space->idy; 1493d24d4204SJose E. Roman sp_val = space->val; 1494d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1495d24d4204SJose E. Roman j = owner[i]; 1496d24d4204SJose E. Roman if (bs2 == 1) { 1497d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1498d24d4204SJose E. Roman } else { 1499d24d4204SJose E. Roman PetscInt k; 1500d24d4204SJose E. Roman PetscScalar *buf1,*buf2; 1501d24d4204SJose E. Roman buf1 = svalues+bs2*startv[j]; 1502d24d4204SJose E. Roman buf2 = space->val + bs2*l; 1503d24d4204SJose E. Roman for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1504d24d4204SJose E. Roman } 1505d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1506d24d4204SJose E. Roman sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1507d24d4204SJose E. Roman startv[j]++; 1508d24d4204SJose E. Roman starti[j]++; 1509d24d4204SJose E. Roman i++; 1510d24d4204SJose E. Roman } 1511d24d4204SJose E. Roman space = space_next; 1512d24d4204SJose E. Roman } 1513d24d4204SJose E. Roman startv[0] = 0; 1514d24d4204SJose E. Roman for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1515d24d4204SJose E. Roman 1516d24d4204SJose E. Roman for (i=0,count=0; i<size; i++) { 1517d24d4204SJose E. Roman if (sizes[i]) { 1518d24d4204SJose E. Roman ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 1519d24d4204SJose E. Roman ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 1520d24d4204SJose E. Roman } 1521d24d4204SJose E. Roman } 1522d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 1523d24d4204SJose E. Roman ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1524d24d4204SJose E. Roman for (i=0; i<size; i++) { 1525d24d4204SJose E. Roman if (sizes[i]) { 1526d24d4204SJose E. Roman ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1527d24d4204SJose E. Roman } 1528d24d4204SJose E. Roman } 1529d24d4204SJose E. Roman #endif 1530d24d4204SJose E. Roman ierr = PetscFree(nlengths);CHKERRQ(ierr); 1531d24d4204SJose E. Roman ierr = PetscFree(owner);CHKERRQ(ierr); 1532d24d4204SJose E. Roman ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1533d24d4204SJose E. Roman ierr = PetscFree(sizes);CHKERRQ(ierr); 1534d24d4204SJose E. Roman 1535d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1536d24d4204SJose E. Roman ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1537d24d4204SJose E. Roman 1538d24d4204SJose E. Roman for (i=0; i<nreceives; i++) { 1539d24d4204SJose E. Roman recv_waits[2*i] = recv_waits1[i]; 1540d24d4204SJose E. Roman recv_waits[2*i+1] = recv_waits2[i]; 1541d24d4204SJose E. Roman } 1542d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1543d24d4204SJose E. Roman 1544d24d4204SJose E. Roman ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1545d24d4204SJose E. Roman ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1546d24d4204SJose E. Roman 1547d24d4204SJose E. Roman stash->svalues = svalues; 1548d24d4204SJose E. Roman stash->sindices = sindices; 1549d24d4204SJose E. Roman stash->rvalues = rvalues; 1550d24d4204SJose E. Roman stash->rindices = rindices; 1551d24d4204SJose E. Roman stash->send_waits = send_waits; 1552d24d4204SJose E. Roman stash->nsends = nsends; 1553d24d4204SJose E. Roman stash->nrecvs = nreceives; 1554d24d4204SJose E. Roman stash->reproduce_count = 0; 1555d24d4204SJose E. Roman PetscFunctionReturn(0); 1556d24d4204SJose E. Roman } 1557d24d4204SJose E. Roman 1558d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1559d24d4204SJose E. Roman { 1560d24d4204SJose E. Roman PetscErrorCode ierr; 1561d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1562d24d4204SJose E. Roman 1563d24d4204SJose E. Roman PetscFunctionBegin; 1564d24d4204SJose E. Roman if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1565d24d4204SJose E. Roman if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1566d24d4204SJose E. Roman if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1567d24d4204SJose E. Roman ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1568d24d4204SJose E. Roman ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1569d24d4204SJose E. Roman PetscFunctionReturn(0); 1570d24d4204SJose E. Roman } 1571d24d4204SJose E. Roman 1572d24d4204SJose E. Roman /*@ 1573d24d4204SJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1574d24d4204SJose E. Roman the ScaLAPACK matrix 1575d24d4204SJose E. Roman 1576d24d4204SJose E. Roman Logically Collective on A 1577d24d4204SJose E. Roman 1578d24d4204SJose E. Roman Input Parameter: 1579d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1580d24d4204SJose E. Roman . mb - the row block size 1581d24d4204SJose E. Roman - nb - the column block size 1582d24d4204SJose E. Roman 1583d24d4204SJose E. Roman Level: intermediate 1584d24d4204SJose E. Roman 1585d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1586d24d4204SJose E. Roman @*/ 1587d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1588d24d4204SJose E. Roman { 1589d24d4204SJose E. Roman PetscErrorCode ierr; 1590d24d4204SJose E. Roman 1591d24d4204SJose E. Roman PetscFunctionBegin; 1592d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1593d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,mb,2); 1594d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,nb,3); 1595d24d4204SJose E. Roman ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1596d24d4204SJose E. Roman PetscFunctionReturn(0); 1597d24d4204SJose E. Roman } 1598d24d4204SJose E. Roman 1599d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1600d24d4204SJose E. Roman { 1601d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1602d24d4204SJose E. Roman 1603d24d4204SJose E. Roman PetscFunctionBegin; 1604d24d4204SJose E. Roman if (mb) *mb = a->mb; 1605d24d4204SJose E. Roman if (nb) *nb = a->nb; 1606d24d4204SJose E. Roman PetscFunctionReturn(0); 1607d24d4204SJose E. Roman } 1608d24d4204SJose E. Roman 1609d24d4204SJose E. Roman /*@ 1610d24d4204SJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1611d24d4204SJose E. Roman the ScaLAPACK matrix 1612d24d4204SJose E. Roman 1613d24d4204SJose E. Roman Not collective 1614d24d4204SJose E. Roman 1615d24d4204SJose E. Roman Input Parameter: 1616d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1617d24d4204SJose E. Roman 1618d24d4204SJose E. Roman Output Parameters: 1619d24d4204SJose E. Roman + mb - the row block size 1620d24d4204SJose E. Roman - nb - the column block size 1621d24d4204SJose E. Roman 1622d24d4204SJose E. Roman Level: intermediate 1623d24d4204SJose E. Roman 1624d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1625d24d4204SJose E. Roman @*/ 1626d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1627d24d4204SJose E. Roman { 1628d24d4204SJose E. Roman PetscErrorCode ierr; 1629d24d4204SJose E. Roman 1630d24d4204SJose E. Roman PetscFunctionBegin; 1631d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1632d24d4204SJose E. Roman ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1633d24d4204SJose E. Roman PetscFunctionReturn(0); 1634d24d4204SJose E. Roman } 1635d24d4204SJose E. Roman 1636d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1637d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1638d24d4204SJose E. Roman 1639d24d4204SJose E. Roman /*MC 1640d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1641d24d4204SJose E. Roman 1642d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1643d24d4204SJose E. Roman 1644d24d4204SJose E. Roman Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1645d24d4204SJose E. Roman 1646d24d4204SJose E. Roman Options Database Keys: 1647d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1648d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1649d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1650d24d4204SJose E. Roman 1651d24d4204SJose E. Roman Level: beginner 1652d24d4204SJose E. Roman 1653d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL 1654d24d4204SJose E. Roman M*/ 1655d24d4204SJose E. Roman 1656d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1657d24d4204SJose E. Roman { 1658d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1659d24d4204SJose E. Roman PetscErrorCode ierr; 1660d24d4204SJose E. Roman PetscBool flg,flg1; 1661d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1662d24d4204SJose E. Roman MPI_Comm icomm; 1663d24d4204SJose E. Roman PetscBLASInt nprow,npcol,myrow,mycol; 1664d24d4204SJose E. Roman PetscInt optv1,k=2,array[2]={0,0}; 1665d24d4204SJose E. Roman PetscMPIInt size; 1666d24d4204SJose E. Roman 1667d24d4204SJose E. Roman PetscFunctionBegin; 1668d24d4204SJose E. Roman ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1669d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1670d24d4204SJose E. Roman 1671d24d4204SJose E. Roman ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1672d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1673d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1674d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1675d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1676d24d4204SJose E. Roman 1677d24d4204SJose E. Roman ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1678d24d4204SJose E. Roman A->data = (void*)a; 1679d24d4204SJose E. Roman 1680d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1681d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1682d24d4204SJose E. Roman ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr); 1683d24d4204SJose E. Roman } 1684d24d4204SJose E. Roman ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1685d24d4204SJose E. Roman ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1686d24d4204SJose E. Roman if (!flg) { 1687d24d4204SJose E. Roman ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1688d24d4204SJose E. Roman 1689d24d4204SJose E. Roman ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr); 1690d24d4204SJose E. Roman grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1691d24d4204SJose E. Roman 1692d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1693d24d4204SJose E. Roman ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1694d24d4204SJose E. Roman if (flg1) { 1695d24d4204SJose E. Roman if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1696d24d4204SJose E. Roman grid->nprow = optv1; 1697d24d4204SJose E. Roman } 1698d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1699d24d4204SJose E. Roman 1700d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1701d24d4204SJose E. Roman grid->npcol = size/grid->nprow; 1702d24d4204SJose E. Roman ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1703d24d4204SJose E. Roman ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1704d24d4204SJose E. Roman Cblacs_get(-1,0,&grid->ictxt); 1705d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1706d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1707d24d4204SJose E. Roman grid->grid_refct = 1; 1708d24d4204SJose E. Roman grid->nprow = nprow; 1709d24d4204SJose E. Roman grid->npcol = npcol; 1710d24d4204SJose E. Roman grid->myrow = myrow; 1711d24d4204SJose E. Roman grid->mycol = mycol; 1712d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1713d24d4204SJose E. Roman Cblacs_get(-1,0,&grid->ictxrow); 1714d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1715d24d4204SJose E. Roman Cblacs_get(-1,0,&grid->ictxcol); 1716d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1717d24d4204SJose E. Roman ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr); 1718d24d4204SJose E. Roman 1719d24d4204SJose E. Roman } else grid->grid_refct++; 1720d24d4204SJose E. Roman ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1721d24d4204SJose E. Roman a->grid = grid; 1722d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1723d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1724d24d4204SJose E. Roman 1725d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1726d24d4204SJose E. Roman ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1727d24d4204SJose E. Roman if (flg) { 1728d24d4204SJose E. Roman a->mb = array[0]; 1729d24d4204SJose E. Roman a->nb = (k>1)? array[1]: a->mb; 1730d24d4204SJose E. Roman } 1731d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1732d24d4204SJose E. Roman 1733d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1734d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1735d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1736d24d4204SJose E. Roman ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1737d24d4204SJose E. Roman PetscFunctionReturn(0); 1738d24d4204SJose E. Roman } 1739d24d4204SJose E. Roman 1740d24d4204SJose E. Roman /*@C 1741d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1742d24d4204SJose E. Roman (2D block cyclic distribution). 1743d24d4204SJose E. Roman 1744d24d4204SJose E. Roman Collective 1745d24d4204SJose E. Roman 1746d24d4204SJose E. Roman Input Parameters: 1747d24d4204SJose E. Roman + comm - MPI communicator 1748d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1749d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1750d24d4204SJose E. Roman . M - number of global rows 1751d24d4204SJose E. Roman . N - number of global columns 1752d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1753d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1754d24d4204SJose E. Roman 1755d24d4204SJose E. Roman Output Parameter: 1756d24d4204SJose E. Roman . A - the matrix 1757d24d4204SJose E. Roman 1758d24d4204SJose E. Roman Options Database Keys: 1759d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1760d24d4204SJose E. Roman 1761d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1762d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1763d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1764d24d4204SJose E. Roman 1765d24d4204SJose E. Roman Notes: 1766d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1767d24d4204SJose E. Roman is chosen. 1768d24d4204SJose E. Roman 1769d24d4204SJose E. Roman Storage Information: 1770d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1771d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1772d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1773d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1774d24d4204SJose E. Roman 1775d24d4204SJose E. Roman Level: intermediate 1776d24d4204SJose E. Roman 1777d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1778d24d4204SJose E. Roman @*/ 1779d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1780d24d4204SJose E. Roman { 1781d24d4204SJose E. Roman PetscErrorCode ierr; 1782d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1783d24d4204SJose E. Roman PetscInt m,n; 1784d24d4204SJose E. Roman 1785d24d4204SJose E. Roman PetscFunctionBegin; 1786d24d4204SJose E. Roman ierr = MatCreate(comm,A);CHKERRQ(ierr); 1787d24d4204SJose E. Roman ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1788d24d4204SJose E. Roman if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1789d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1790d24d4204SJose E. Roman m = PETSC_DECIDE; 1791d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1792d24d4204SJose E. Roman n = PETSC_DECIDE; 1793d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1794d24d4204SJose E. Roman ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1795d24d4204SJose E. Roman a = (Mat_ScaLAPACK*)(*A)->data; 1796d24d4204SJose E. Roman ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1797d24d4204SJose E. Roman ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1798d24d4204SJose E. Roman ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1799d24d4204SJose E. Roman ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1800d24d4204SJose E. Roman ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1801d24d4204SJose E. Roman ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1802d24d4204SJose E. Roman ierr = MatSetUp(*A);CHKERRQ(ierr); 1803d24d4204SJose E. Roman PetscFunctionReturn(0); 1804d24d4204SJose E. Roman } 1805d24d4204SJose E. Roman 1806