1*d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2*d24d4204SJose E. Roman 3*d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64 4*d24d4204SJose E. Roman 5*d24d4204SJose E. Roman /* 6*d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7*d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8*d24d4204SJose E. Roman */ 9*d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10*d24d4204SJose E. Roman 11*d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 12*d24d4204SJose E. Roman { 13*d24d4204SJose E. Roman PetscErrorCode ierr; 14*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 15*d24d4204SJose E. Roman PetscBool iascii; 16*d24d4204SJose E. Roman PetscViewerFormat format; 17*d24d4204SJose E. Roman Mat Adense; 18*d24d4204SJose E. Roman 19*d24d4204SJose E. Roman PetscFunctionBegin; 20*d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 21*d24d4204SJose E. Roman if (iascii) { 22*d24d4204SJose E. Roman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 23*d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 24*d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 25*d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 26*d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 27*d24d4204SJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 28*d24d4204SJose E. Roman PetscFunctionReturn(0); 29*d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 30*d24d4204SJose E. Roman PetscFunctionReturn(0); 31*d24d4204SJose E. Roman } 32*d24d4204SJose E. Roman } 33*d24d4204SJose E. Roman /* convert to dense format and call MatView() */ 34*d24d4204SJose E. Roman ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 35*d24d4204SJose E. Roman ierr = MatView(Adense,viewer);CHKERRQ(ierr); 36*d24d4204SJose E. Roman ierr = MatDestroy(&Adense);CHKERRQ(ierr); 37*d24d4204SJose E. Roman PetscFunctionReturn(0); 38*d24d4204SJose E. Roman } 39*d24d4204SJose E. Roman 40*d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 41*d24d4204SJose E. Roman { 42*d24d4204SJose E. Roman PetscErrorCode ierr; 43*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 44*d24d4204SJose E. Roman PetscLogDouble isend[2],irecv[2]; 45*d24d4204SJose E. Roman 46*d24d4204SJose E. Roman PetscFunctionBegin; 47*d24d4204SJose E. Roman info->block_size = 1.0; 48*d24d4204SJose E. Roman 49*d24d4204SJose E. Roman isend[0] = a->lld*a->locc; /* locally allocated */ 50*d24d4204SJose E. Roman isend[1] = a->locr*a->locc; /* used submatrix */ 51*d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 52*d24d4204SJose E. Roman info->nz_allocated = isend[0]; 53*d24d4204SJose E. Roman info->nz_used = isend[1]; 54*d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) { 55*d24d4204SJose E. Roman ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 56*d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 57*d24d4204SJose E. Roman info->nz_used = irecv[1]; 58*d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) { 59*d24d4204SJose E. Roman ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 60*d24d4204SJose E. Roman info->nz_allocated = irecv[0]; 61*d24d4204SJose E. Roman info->nz_used = irecv[1]; 62*d24d4204SJose E. Roman } 63*d24d4204SJose E. Roman 64*d24d4204SJose E. Roman info->nz_unneeded = 0; 65*d24d4204SJose E. Roman info->assemblies = A->num_ass; 66*d24d4204SJose E. Roman info->mallocs = 0; 67*d24d4204SJose E. Roman info->memory = ((PetscObject)A)->mem; 68*d24d4204SJose E. Roman info->fill_ratio_given = 0; 69*d24d4204SJose E. Roman info->fill_ratio_needed = 0; 70*d24d4204SJose E. Roman info->factor_mallocs = 0; 71*d24d4204SJose E. Roman PetscFunctionReturn(0); 72*d24d4204SJose E. Roman } 73*d24d4204SJose E. Roman 74*d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 75*d24d4204SJose E. Roman { 76*d24d4204SJose E. Roman PetscFunctionBegin; 77*d24d4204SJose E. Roman switch (op) { 78*d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS: 79*d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR: 80*d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR: 81*d24d4204SJose E. Roman case MAT_SYMMETRIC: 82*d24d4204SJose E. Roman case MAT_SORTED_FULL: 83*d24d4204SJose E. Roman case MAT_HERMITIAN: 84*d24d4204SJose E. Roman break; 85*d24d4204SJose E. Roman default: 86*d24d4204SJose E. Roman SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 87*d24d4204SJose E. Roman } 88*d24d4204SJose E. Roman PetscFunctionReturn(0); 89*d24d4204SJose E. Roman } 90*d24d4204SJose E. Roman 91*d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 92*d24d4204SJose E. Roman { 93*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94*d24d4204SJose E. Roman PetscErrorCode ierr; 95*d24d4204SJose E. Roman PetscInt i,j; 96*d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 97*d24d4204SJose E. Roman 98*d24d4204SJose E. Roman PetscFunctionBegin; 99*d24d4204SJose E. Roman for (i=0;i<nr;i++) { 100*d24d4204SJose E. Roman if (rows[i] < 0) continue; 101*d24d4204SJose E. Roman ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 102*d24d4204SJose E. Roman for (j=0;j<nc;j++) { 103*d24d4204SJose E. Roman if (cols[j] < 0) continue; 104*d24d4204SJose E. Roman ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 105*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 106*d24d4204SJose E. Roman if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 107*d24d4204SJose E. Roman switch (imode) { 108*d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 109*d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 110*d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 111*d24d4204SJose E. Roman } 112*d24d4204SJose E. Roman } else { 113*d24d4204SJose E. Roman if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 114*d24d4204SJose E. Roman A->assembled = PETSC_FALSE; 115*d24d4204SJose E. Roman ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 116*d24d4204SJose E. Roman } 117*d24d4204SJose E. Roman } 118*d24d4204SJose E. Roman } 119*d24d4204SJose E. Roman PetscFunctionReturn(0); 120*d24d4204SJose E. Roman } 121*d24d4204SJose E. Roman 122*d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 123*d24d4204SJose E. Roman { 124*d24d4204SJose E. Roman PetscErrorCode ierr; 125*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 126*d24d4204SJose E. Roman PetscScalar *x2d,*y2d,alpha=1.0; 127*d24d4204SJose E. Roman const PetscInt *ranges; 128*d24d4204SJose E. Roman PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 129*d24d4204SJose E. Roman 130*d24d4204SJose E. Roman PetscFunctionBegin; 131*d24d4204SJose E. Roman if (transpose) { 132*d24d4204SJose E. Roman 133*d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 134*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 135*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 136*d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 137*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 138*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 139*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 140*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 141*d24d4204SJose E. Roman ylld = 1; 142*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 143*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 144*d24d4204SJose E. Roman 145*d24d4204SJose E. Roman /* allocate 2d vectors */ 146*d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 147*d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 148*d24d4204SJose E. Roman ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 149*d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 150*d24d4204SJose E. Roman 151*d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 152*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 153*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 154*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 155*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 156*d24d4204SJose E. Roman 157*d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 158*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 159*d24d4204SJose E. Roman 160*d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */ 161*d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 162*d24d4204SJose E. Roman 163*d24d4204SJose E. Roman /* call PBLAS subroutine */ 164*d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 165*d24d4204SJose E. Roman 166*d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */ 167*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 168*d24d4204SJose E. Roman 169*d24d4204SJose E. Roman } else { /* non-transpose */ 170*d24d4204SJose E. Roman 171*d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 172*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 173*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 174*d24d4204SJose E. Roman xlld = 1; 175*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 176*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 177*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 178*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 179*d24d4204SJose E. Roman ylld = PetscMax(1,A->rmap->n); 180*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 181*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 182*d24d4204SJose E. Roman 183*d24d4204SJose E. Roman /* allocate 2d vectors */ 184*d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 185*d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 186*d24d4204SJose E. Roman ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 187*d24d4204SJose E. Roman ylld = PetscMax(1,lszy); 188*d24d4204SJose E. Roman 189*d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 190*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 191*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 192*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 193*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 194*d24d4204SJose E. Roman 195*d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */ 196*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 197*d24d4204SJose E. Roman 198*d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */ 199*d24d4204SJose E. Roman if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 200*d24d4204SJose E. Roman 201*d24d4204SJose E. Roman /* call PBLAS subroutine */ 202*d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 203*d24d4204SJose E. Roman 204*d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */ 205*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 206*d24d4204SJose E. Roman 207*d24d4204SJose E. Roman } 208*d24d4204SJose E. Roman ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 209*d24d4204SJose E. Roman PetscFunctionReturn(0); 210*d24d4204SJose E. Roman } 211*d24d4204SJose E. Roman 212*d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 213*d24d4204SJose E. Roman { 214*d24d4204SJose E. Roman PetscErrorCode ierr; 215*d24d4204SJose E. Roman const PetscScalar *xarray; 216*d24d4204SJose E. Roman PetscScalar *yarray; 217*d24d4204SJose E. Roman 218*d24d4204SJose E. Roman PetscFunctionBegin; 219*d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 220*d24d4204SJose E. Roman ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 221*d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 222*d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 223*d24d4204SJose E. Roman ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 224*d24d4204SJose E. Roman PetscFunctionReturn(0); 225*d24d4204SJose E. Roman } 226*d24d4204SJose E. Roman 227*d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 228*d24d4204SJose E. Roman { 229*d24d4204SJose E. Roman PetscErrorCode ierr; 230*d24d4204SJose E. Roman const PetscScalar *xarray; 231*d24d4204SJose E. Roman PetscScalar *yarray; 232*d24d4204SJose E. Roman 233*d24d4204SJose E. Roman PetscFunctionBegin; 234*d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 235*d24d4204SJose E. Roman ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 236*d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 237*d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 238*d24d4204SJose E. Roman ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 239*d24d4204SJose E. Roman PetscFunctionReturn(0); 240*d24d4204SJose E. Roman } 241*d24d4204SJose E. Roman 242*d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 243*d24d4204SJose E. Roman { 244*d24d4204SJose E. Roman PetscErrorCode ierr; 245*d24d4204SJose E. Roman const PetscScalar *xarray; 246*d24d4204SJose E. Roman PetscScalar *zarray; 247*d24d4204SJose E. Roman 248*d24d4204SJose E. Roman PetscFunctionBegin; 249*d24d4204SJose E. Roman if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 250*d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 251*d24d4204SJose E. Roman ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 252*d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 253*d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 254*d24d4204SJose E. Roman ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 255*d24d4204SJose E. Roman PetscFunctionReturn(0); 256*d24d4204SJose E. Roman } 257*d24d4204SJose E. Roman 258*d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 259*d24d4204SJose E. Roman { 260*d24d4204SJose E. Roman PetscErrorCode ierr; 261*d24d4204SJose E. Roman const PetscScalar *xarray; 262*d24d4204SJose E. Roman PetscScalar *zarray; 263*d24d4204SJose E. Roman 264*d24d4204SJose E. Roman PetscFunctionBegin; 265*d24d4204SJose E. Roman if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 266*d24d4204SJose E. Roman ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 267*d24d4204SJose E. Roman ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 268*d24d4204SJose E. Roman ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 269*d24d4204SJose E. Roman ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 270*d24d4204SJose E. Roman ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 271*d24d4204SJose E. Roman PetscFunctionReturn(0); 272*d24d4204SJose E. Roman } 273*d24d4204SJose E. Roman 274*d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 275*d24d4204SJose E. Roman { 276*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 277*d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 278*d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 279*d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 280*d24d4204SJose E. Roman PetscBLASInt one=1; 281*d24d4204SJose E. Roman 282*d24d4204SJose E. Roman PetscFunctionBegin; 283*d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 284*d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 285*d24d4204SJose E. Roman PetscFunctionReturn(0); 286*d24d4204SJose E. Roman } 287*d24d4204SJose E. Roman 288*d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 289*d24d4204SJose E. Roman { 290*d24d4204SJose E. Roman PetscErrorCode ierr; 291*d24d4204SJose E. Roman 292*d24d4204SJose E. Roman PetscFunctionBegin; 293*d24d4204SJose E. Roman ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 294*d24d4204SJose E. Roman ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 295*d24d4204SJose E. Roman ierr = MatSetUp(C);CHKERRQ(ierr); 296*d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 297*d24d4204SJose E. Roman PetscFunctionReturn(0); 298*d24d4204SJose E. Roman } 299*d24d4204SJose E. Roman 300*d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 301*d24d4204SJose E. Roman { 302*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 303*d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 304*d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 305*d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 306*d24d4204SJose E. Roman PetscBLASInt one=1; 307*d24d4204SJose E. Roman 308*d24d4204SJose E. Roman PetscFunctionBegin; 309*d24d4204SJose E. Roman PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 310*d24d4204SJose E. Roman C->assembled = PETSC_TRUE; 311*d24d4204SJose E. Roman PetscFunctionReturn(0); 312*d24d4204SJose E. Roman } 313*d24d4204SJose E. Roman 314*d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 315*d24d4204SJose E. Roman { 316*d24d4204SJose E. Roman PetscErrorCode ierr; 317*d24d4204SJose E. Roman 318*d24d4204SJose E. Roman PetscFunctionBegin; 319*d24d4204SJose E. Roman ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 320*d24d4204SJose E. Roman ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 321*d24d4204SJose E. Roman ierr = MatSetUp(C);CHKERRQ(ierr); 322*d24d4204SJose E. Roman PetscFunctionReturn(0); 323*d24d4204SJose E. Roman } 324*d24d4204SJose E. Roman 325*d24d4204SJose E. Roman /* --------------------------------------- */ 326*d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 327*d24d4204SJose E. Roman { 328*d24d4204SJose E. Roman PetscFunctionBegin; 329*d24d4204SJose E. Roman C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 330*d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_AB; 331*d24d4204SJose E. Roman PetscFunctionReturn(0); 332*d24d4204SJose E. Roman } 333*d24d4204SJose E. Roman 334*d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 335*d24d4204SJose E. Roman { 336*d24d4204SJose E. Roman PetscFunctionBegin; 337*d24d4204SJose E. Roman C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 338*d24d4204SJose E. Roman C->ops->productsymbolic = MatProductSymbolic_ABt; 339*d24d4204SJose E. Roman PetscFunctionReturn(0); 340*d24d4204SJose E. Roman } 341*d24d4204SJose E. Roman 342*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 343*d24d4204SJose E. Roman { 344*d24d4204SJose E. Roman PetscErrorCode ierr; 345*d24d4204SJose E. Roman Mat_Product *product = C->product; 346*d24d4204SJose E. Roman 347*d24d4204SJose E. Roman PetscFunctionBegin; 348*d24d4204SJose E. Roman switch (product->type) { 349*d24d4204SJose E. Roman case MATPRODUCT_AB: 350*d24d4204SJose E. Roman ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 351*d24d4204SJose E. Roman break; 352*d24d4204SJose E. Roman case MATPRODUCT_ABt: 353*d24d4204SJose E. Roman ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 354*d24d4204SJose E. Roman break; 355*d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 356*d24d4204SJose E. Roman } 357*d24d4204SJose E. Roman PetscFunctionReturn(0); 358*d24d4204SJose E. Roman } 359*d24d4204SJose E. Roman /* --------------------------------------- */ 360*d24d4204SJose E. Roman 361*d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 362*d24d4204SJose E. Roman { 363*d24d4204SJose E. Roman PetscErrorCode ierr; 364*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 365*d24d4204SJose E. Roman PetscScalar *darray,*d2d,v; 366*d24d4204SJose E. Roman const PetscInt *ranges; 367*d24d4204SJose E. Roman PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 368*d24d4204SJose E. Roman 369*d24d4204SJose E. Roman PetscFunctionBegin; 370*d24d4204SJose E. Roman ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 371*d24d4204SJose E. Roman 372*d24d4204SJose E. Roman if (A->rmap->N<=A->cmap->N) { /* row version */ 373*d24d4204SJose E. Roman 374*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 375*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 376*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 377*d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 378*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 379*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 380*d24d4204SJose E. Roman 381*d24d4204SJose E. Roman /* allocate 2d vector */ 382*d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 383*d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 384*d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 385*d24d4204SJose E. Roman 386*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 387*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 388*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 389*d24d4204SJose E. Roman 390*d24d4204SJose E. Roman /* collect diagonal */ 391*d24d4204SJose E. Roman for (j=1;j<=a->M;j++) { 392*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 393*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 394*d24d4204SJose E. Roman } 395*d24d4204SJose E. Roman 396*d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */ 397*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 398*d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 399*d24d4204SJose E. Roman 400*d24d4204SJose E. Roman } else { /* column version */ 401*d24d4204SJose E. Roman 402*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 403*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 404*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 405*d24d4204SJose E. Roman dlld = 1; 406*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 407*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 408*d24d4204SJose E. Roman 409*d24d4204SJose E. Roman /* allocate 2d vector */ 410*d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 411*d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 412*d24d4204SJose E. Roman 413*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 414*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 415*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 416*d24d4204SJose E. Roman 417*d24d4204SJose E. Roman /* collect diagonal */ 418*d24d4204SJose E. Roman for (j=1;j<=a->N;j++) { 419*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 420*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 421*d24d4204SJose E. Roman } 422*d24d4204SJose E. Roman 423*d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */ 424*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 425*d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 426*d24d4204SJose E. Roman } 427*d24d4204SJose E. Roman 428*d24d4204SJose E. Roman ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 429*d24d4204SJose E. Roman ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 430*d24d4204SJose E. Roman ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 431*d24d4204SJose E. Roman PetscFunctionReturn(0); 432*d24d4204SJose E. Roman } 433*d24d4204SJose E. Roman 434*d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 435*d24d4204SJose E. Roman { 436*d24d4204SJose E. Roman PetscErrorCode ierr; 437*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 438*d24d4204SJose E. Roman const PetscScalar *d; 439*d24d4204SJose E. Roman const PetscInt *ranges; 440*d24d4204SJose E. Roman PetscScalar *d2d; 441*d24d4204SJose E. Roman PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 442*d24d4204SJose E. Roman 443*d24d4204SJose E. Roman PetscFunctionBegin; 444*d24d4204SJose E. Roman if (R) { 445*d24d4204SJose E. Roman ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 446*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 447*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 448*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 449*d24d4204SJose E. Roman dlld = 1; 450*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 451*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 452*d24d4204SJose E. Roman 453*d24d4204SJose E. Roman /* allocate 2d vector */ 454*d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 455*d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 456*d24d4204SJose E. Roman 457*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 458*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 459*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 460*d24d4204SJose E. Roman 461*d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */ 462*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 463*d24d4204SJose E. Roman 464*d24d4204SJose E. Roman /* broadcast along process columns */ 465*d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 466*d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 467*d24d4204SJose E. Roman 468*d24d4204SJose E. Roman /* local scaling */ 469*d24d4204SJose E. Roman for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 470*d24d4204SJose E. Roman 471*d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 472*d24d4204SJose E. Roman ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 473*d24d4204SJose E. Roman } 474*d24d4204SJose E. Roman if (L) { 475*d24d4204SJose E. Roman ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 476*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */ 477*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 478*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 479*d24d4204SJose E. Roman dlld = PetscMax(1,A->rmap->n); 480*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 481*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 482*d24d4204SJose E. Roman 483*d24d4204SJose E. Roman /* allocate 2d vector */ 484*d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 485*d24d4204SJose E. Roman ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 486*d24d4204SJose E. Roman dlld = PetscMax(1,lszd); 487*d24d4204SJose E. Roman 488*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */ 489*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 490*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 491*d24d4204SJose E. Roman 492*d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */ 493*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 494*d24d4204SJose E. Roman 495*d24d4204SJose E. Roman /* broadcast along process rows */ 496*d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 497*d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 498*d24d4204SJose E. Roman 499*d24d4204SJose E. Roman /* local scaling */ 500*d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 501*d24d4204SJose E. Roman 502*d24d4204SJose E. Roman ierr = PetscFree(d2d);CHKERRQ(ierr); 503*d24d4204SJose E. Roman ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 504*d24d4204SJose E. Roman } 505*d24d4204SJose E. Roman PetscFunctionReturn(0); 506*d24d4204SJose E. Roman } 507*d24d4204SJose E. Roman 508*d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 509*d24d4204SJose E. Roman { 510*d24d4204SJose E. Roman PetscFunctionBegin; 511*d24d4204SJose E. Roman *missing = PETSC_FALSE; 512*d24d4204SJose E. Roman PetscFunctionReturn(0); 513*d24d4204SJose E. Roman } 514*d24d4204SJose E. Roman 515*d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 516*d24d4204SJose E. Roman { 517*d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 518*d24d4204SJose E. Roman PetscBLASInt n,one=1; 519*d24d4204SJose E. Roman 520*d24d4204SJose E. Roman PetscFunctionBegin; 521*d24d4204SJose E. Roman n = x->lld*x->locc; 522*d24d4204SJose E. Roman PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 523*d24d4204SJose E. Roman PetscFunctionReturn(0); 524*d24d4204SJose E. Roman } 525*d24d4204SJose E. Roman 526*d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 527*d24d4204SJose E. Roman { 528*d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 529*d24d4204SJose E. Roman PetscBLASInt i,n; 530*d24d4204SJose E. Roman PetscScalar v; 531*d24d4204SJose E. Roman 532*d24d4204SJose E. Roman PetscFunctionBegin; 533*d24d4204SJose E. Roman n = PetscMin(x->M,x->N); 534*d24d4204SJose E. Roman for (i=1;i<=n;i++) { 535*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 536*d24d4204SJose E. Roman v += alpha; 537*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 538*d24d4204SJose E. Roman } 539*d24d4204SJose E. Roman PetscFunctionReturn(0); 540*d24d4204SJose E. Roman } 541*d24d4204SJose E. Roman 542*d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 543*d24d4204SJose E. Roman { 544*d24d4204SJose E. Roman PetscErrorCode ierr; 545*d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546*d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 547*d24d4204SJose E. Roman PetscBLASInt one=1; 548*d24d4204SJose E. Roman PetscScalar beta=1.0; 549*d24d4204SJose E. Roman 550*d24d4204SJose E. Roman PetscFunctionBegin; 551*d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y,1,X,3); 552*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 553*d24d4204SJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 554*d24d4204SJose E. Roman PetscFunctionReturn(0); 555*d24d4204SJose E. Roman } 556*d24d4204SJose E. Roman 557*d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 558*d24d4204SJose E. Roman { 559*d24d4204SJose E. Roman PetscErrorCode ierr; 560*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 561*d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 562*d24d4204SJose E. Roman 563*d24d4204SJose E. Roman PetscFunctionBegin; 564*d24d4204SJose E. Roman ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 565*d24d4204SJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 566*d24d4204SJose E. Roman PetscFunctionReturn(0); 567*d24d4204SJose E. Roman } 568*d24d4204SJose E. Roman 569*d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 570*d24d4204SJose E. Roman { 571*d24d4204SJose E. Roman Mat Bs; 572*d24d4204SJose E. Roman MPI_Comm comm; 573*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 574*d24d4204SJose E. Roman PetscErrorCode ierr; 575*d24d4204SJose E. Roman 576*d24d4204SJose E. Roman PetscFunctionBegin; 577*d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 578*d24d4204SJose E. Roman ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 579*d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 580*d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 581*d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 582*d24d4204SJose E. Roman b->M = a->M; 583*d24d4204SJose E. Roman b->N = a->N; 584*d24d4204SJose E. Roman b->mb = a->mb; 585*d24d4204SJose E. Roman b->nb = a->nb; 586*d24d4204SJose E. Roman b->rsrc = a->rsrc; 587*d24d4204SJose E. Roman b->csrc = a->csrc; 588*d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 589*d24d4204SJose E. Roman *B = Bs; 590*d24d4204SJose E. Roman if (op == MAT_COPY_VALUES) { 591*d24d4204SJose E. Roman ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 592*d24d4204SJose E. Roman } 593*d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 594*d24d4204SJose E. Roman PetscFunctionReturn(0); 595*d24d4204SJose E. Roman } 596*d24d4204SJose E. Roman 597*d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 598*d24d4204SJose E. Roman { 599*d24d4204SJose E. Roman PetscErrorCode ierr; 600*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601*d24d4204SJose E. Roman Mat Bs = *B; 602*d24d4204SJose E. Roman PetscBLASInt one=1; 603*d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 604*d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 605*d24d4204SJose E. Roman PetscInt i,j; 606*d24d4204SJose E. Roman #endif 607*d24d4204SJose E. Roman 608*d24d4204SJose E. Roman PetscFunctionBegin; 609*d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 610*d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 611*d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 612*d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 613*d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 614*d24d4204SJose E. Roman *B = Bs; 615*d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 616*d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 617*d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 618*d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX) 619*d24d4204SJose E. Roman /* undo conjugation */ 620*d24d4204SJose E. Roman for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 621*d24d4204SJose E. Roman #endif 622*d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 623*d24d4204SJose E. Roman PetscFunctionReturn(0); 624*d24d4204SJose E. Roman } 625*d24d4204SJose E. Roman 626*d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 627*d24d4204SJose E. Roman { 628*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 629*d24d4204SJose E. Roman PetscInt i,j; 630*d24d4204SJose E. Roman 631*d24d4204SJose E. Roman PetscFunctionBegin; 632*d24d4204SJose E. Roman for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 633*d24d4204SJose E. Roman PetscFunctionReturn(0); 634*d24d4204SJose E. Roman } 635*d24d4204SJose E. Roman 636*d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 637*d24d4204SJose E. Roman { 638*d24d4204SJose E. Roman PetscErrorCode ierr; 639*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 640*d24d4204SJose E. Roman Mat Bs = *B; 641*d24d4204SJose E. Roman PetscBLASInt one=1; 642*d24d4204SJose E. Roman PetscScalar sone=1.0,zero=0.0; 643*d24d4204SJose E. Roman 644*d24d4204SJose E. Roman PetscFunctionBegin; 645*d24d4204SJose E. Roman if (reuse == MAT_INITIAL_MATRIX) { 646*d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 647*d24d4204SJose E. Roman ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 648*d24d4204SJose E. Roman ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 649*d24d4204SJose E. Roman ierr = MatSetUp(Bs);CHKERRQ(ierr); 650*d24d4204SJose E. Roman *B = Bs; 651*d24d4204SJose E. Roman } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 652*d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bs->data; 653*d24d4204SJose E. Roman PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 654*d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE; 655*d24d4204SJose E. Roman PetscFunctionReturn(0); 656*d24d4204SJose E. Roman } 657*d24d4204SJose E. Roman 658*d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 659*d24d4204SJose E. Roman { 660*d24d4204SJose E. Roman PetscErrorCode ierr; 661*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 662*d24d4204SJose E. Roman PetscScalar *x,*x2d; 663*d24d4204SJose E. Roman const PetscInt *ranges; 664*d24d4204SJose E. Roman PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 665*d24d4204SJose E. Roman 666*d24d4204SJose E. Roman PetscFunctionBegin; 667*d24d4204SJose E. Roman ierr = VecCopy(B,X);CHKERRQ(ierr); 668*d24d4204SJose E. Roman ierr = VecGetArray(X,&x);CHKERRQ(ierr); 669*d24d4204SJose E. Roman 670*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 671*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 672*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 673*d24d4204SJose E. Roman xlld = PetscMax(1,A->rmap->n); 674*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 675*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 676*d24d4204SJose E. Roman 677*d24d4204SJose E. Roman /* allocate 2d vector */ 678*d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 679*d24d4204SJose E. Roman ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 680*d24d4204SJose E. Roman xlld = PetscMax(1,lszx); 681*d24d4204SJose E. Roman 682*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 683*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 684*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 685*d24d4204SJose E. Roman 686*d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */ 687*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 688*d24d4204SJose E. Roman 689*d24d4204SJose E. Roman /* call ScaLAPACK subroutine */ 690*d24d4204SJose E. Roman switch (A->factortype) { 691*d24d4204SJose E. Roman case MAT_FACTOR_LU: 692*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 693*d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 694*d24d4204SJose E. Roman break; 695*d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 696*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 697*d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 698*d24d4204SJose E. Roman break; 699*d24d4204SJose E. Roman default: 700*d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 701*d24d4204SJose E. Roman break; 702*d24d4204SJose E. Roman } 703*d24d4204SJose E. Roman 704*d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */ 705*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 706*d24d4204SJose E. Roman 707*d24d4204SJose E. Roman ierr = PetscFree(x2d);CHKERRQ(ierr); 708*d24d4204SJose E. Roman ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 709*d24d4204SJose E. Roman PetscFunctionReturn(0); 710*d24d4204SJose E. Roman } 711*d24d4204SJose E. Roman 712*d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 713*d24d4204SJose E. Roman { 714*d24d4204SJose E. Roman PetscErrorCode ierr; 715*d24d4204SJose E. Roman 716*d24d4204SJose E. Roman PetscFunctionBegin; 717*d24d4204SJose E. Roman ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 718*d24d4204SJose E. Roman ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 719*d24d4204SJose E. Roman PetscFunctionReturn(0); 720*d24d4204SJose E. Roman } 721*d24d4204SJose E. Roman 722*d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 723*d24d4204SJose E. Roman { 724*d24d4204SJose E. Roman PetscErrorCode ierr; 725*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 726*d24d4204SJose E. Roman PetscBool flg1,flg2; 727*d24d4204SJose E. Roman PetscBLASInt one=1,info; 728*d24d4204SJose E. Roman 729*d24d4204SJose E. Roman PetscFunctionBegin; 730*d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 731*d24d4204SJose E. Roman ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 732*d24d4204SJose E. Roman if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 733*d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(B,1,X,2); 734*d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)B->data; 735*d24d4204SJose E. Roman x = (Mat_ScaLAPACK*)X->data; 736*d24d4204SJose E. Roman ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 737*d24d4204SJose E. Roman 738*d24d4204SJose E. Roman switch (A->factortype) { 739*d24d4204SJose E. Roman case MAT_FACTOR_LU: 740*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 741*d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs",info); 742*d24d4204SJose E. Roman break; 743*d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY: 744*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 745*d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs",info); 746*d24d4204SJose E. Roman break; 747*d24d4204SJose E. Roman default: 748*d24d4204SJose E. Roman SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 749*d24d4204SJose E. Roman break; 750*d24d4204SJose E. Roman } 751*d24d4204SJose E. Roman PetscFunctionReturn(0); 752*d24d4204SJose E. Roman } 753*d24d4204SJose E. Roman 754*d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 755*d24d4204SJose E. Roman { 756*d24d4204SJose E. Roman PetscErrorCode ierr; 757*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 758*d24d4204SJose E. Roman PetscBLASInt one=1,info; 759*d24d4204SJose E. Roman 760*d24d4204SJose E. Roman PetscFunctionBegin; 761*d24d4204SJose E. Roman if (!a->pivots) { 762*d24d4204SJose E. Roman ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 763*d24d4204SJose E. Roman ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 764*d24d4204SJose E. Roman } 765*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 766*d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf",info); 767*d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU; 768*d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 769*d24d4204SJose E. Roman 770*d24d4204SJose E. Roman ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 771*d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 772*d24d4204SJose E. Roman PetscFunctionReturn(0); 773*d24d4204SJose E. Roman } 774*d24d4204SJose E. Roman 775*d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 776*d24d4204SJose E. Roman { 777*d24d4204SJose E. Roman PetscErrorCode ierr; 778*d24d4204SJose E. Roman 779*d24d4204SJose E. Roman PetscFunctionBegin; 780*d24d4204SJose E. Roman ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 781*d24d4204SJose E. Roman ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 782*d24d4204SJose E. Roman PetscFunctionReturn(0); 783*d24d4204SJose E. Roman } 784*d24d4204SJose E. Roman 785*d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 786*d24d4204SJose E. Roman { 787*d24d4204SJose E. Roman PetscFunctionBegin; 788*d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 789*d24d4204SJose E. Roman PetscFunctionReturn(0); 790*d24d4204SJose E. Roman } 791*d24d4204SJose E. Roman 792*d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 793*d24d4204SJose E. Roman { 794*d24d4204SJose E. Roman PetscErrorCode ierr; 795*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 796*d24d4204SJose E. Roman PetscBLASInt one=1,info; 797*d24d4204SJose E. Roman 798*d24d4204SJose E. Roman PetscFunctionBegin; 799*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 800*d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf",info); 801*d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY; 802*d24d4204SJose E. Roman A->assembled = PETSC_TRUE; 803*d24d4204SJose E. Roman 804*d24d4204SJose E. Roman ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 805*d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 806*d24d4204SJose E. Roman PetscFunctionReturn(0); 807*d24d4204SJose E. Roman } 808*d24d4204SJose E. Roman 809*d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 810*d24d4204SJose E. Roman { 811*d24d4204SJose E. Roman PetscErrorCode ierr; 812*d24d4204SJose E. Roman 813*d24d4204SJose E. Roman PetscFunctionBegin; 814*d24d4204SJose E. Roman ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 815*d24d4204SJose E. Roman ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 816*d24d4204SJose E. Roman PetscFunctionReturn(0); 817*d24d4204SJose E. Roman } 818*d24d4204SJose E. Roman 819*d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 820*d24d4204SJose E. Roman { 821*d24d4204SJose E. Roman PetscFunctionBegin; 822*d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 823*d24d4204SJose E. Roman PetscFunctionReturn(0); 824*d24d4204SJose E. Roman } 825*d24d4204SJose E. Roman 826*d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 827*d24d4204SJose E. Roman { 828*d24d4204SJose E. Roman PetscFunctionBegin; 829*d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK; 830*d24d4204SJose E. Roman PetscFunctionReturn(0); 831*d24d4204SJose E. Roman } 832*d24d4204SJose E. Roman 833*d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 834*d24d4204SJose E. Roman { 835*d24d4204SJose E. Roman Mat B; 836*d24d4204SJose E. Roman PetscErrorCode ierr; 837*d24d4204SJose E. Roman 838*d24d4204SJose E. Roman PetscFunctionBegin; 839*d24d4204SJose E. Roman /* Create the factorization matrix */ 840*d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 841*d24d4204SJose E. Roman ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 842*d24d4204SJose E. Roman ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr); 843*d24d4204SJose E. Roman ierr = MatSetUp(B);CHKERRQ(ierr); 844*d24d4204SJose E. Roman B->factortype = ftype; 845*d24d4204SJose E. Roman ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 846*d24d4204SJose E. Roman ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 847*d24d4204SJose E. Roman 848*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 849*d24d4204SJose E. Roman *F = B; 850*d24d4204SJose E. Roman PetscFunctionReturn(0); 851*d24d4204SJose E. Roman } 852*d24d4204SJose E. Roman 853*d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 854*d24d4204SJose E. Roman { 855*d24d4204SJose E. Roman PetscErrorCode ierr; 856*d24d4204SJose E. Roman 857*d24d4204SJose E. Roman PetscFunctionBegin; 858*d24d4204SJose E. Roman ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 859*d24d4204SJose E. Roman ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 860*d24d4204SJose E. Roman PetscFunctionReturn(0); 861*d24d4204SJose E. Roman } 862*d24d4204SJose E. Roman 863*d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 864*d24d4204SJose E. Roman { 865*d24d4204SJose E. Roman PetscErrorCode ierr; 866*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 867*d24d4204SJose E. Roman PetscBLASInt one=1,lwork=0; 868*d24d4204SJose E. Roman const char *ntype; 869*d24d4204SJose E. Roman PetscScalar *work,dummy; 870*d24d4204SJose E. Roman 871*d24d4204SJose E. Roman PetscFunctionBegin; 872*d24d4204SJose E. Roman switch (type){ 873*d24d4204SJose E. Roman case NORM_1: 874*d24d4204SJose E. Roman ntype = "1"; 875*d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 876*d24d4204SJose E. Roman break; 877*d24d4204SJose E. Roman case NORM_FROBENIUS: 878*d24d4204SJose E. Roman ntype = "F"; 879*d24d4204SJose E. Roman work = &dummy; 880*d24d4204SJose E. Roman break; 881*d24d4204SJose E. Roman case NORM_INFINITY: 882*d24d4204SJose E. Roman ntype = "I"; 883*d24d4204SJose E. Roman lwork = PetscMax(a->locr,a->locc); 884*d24d4204SJose E. Roman break; 885*d24d4204SJose E. Roman default: 886*d24d4204SJose E. Roman SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 887*d24d4204SJose E. Roman } 888*d24d4204SJose E. Roman if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 889*d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 890*d24d4204SJose E. Roman if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 891*d24d4204SJose E. Roman PetscFunctionReturn(0); 892*d24d4204SJose E. Roman } 893*d24d4204SJose E. Roman 894*d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 895*d24d4204SJose E. Roman { 896*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 897*d24d4204SJose E. Roman PetscErrorCode ierr; 898*d24d4204SJose E. Roman 899*d24d4204SJose E. Roman PetscFunctionBegin; 900*d24d4204SJose E. Roman ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 901*d24d4204SJose E. Roman PetscFunctionReturn(0); 902*d24d4204SJose E. Roman } 903*d24d4204SJose E. Roman 904*d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 905*d24d4204SJose E. Roman { 906*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 907*d24d4204SJose E. Roman PetscErrorCode ierr; 908*d24d4204SJose E. Roman PetscInt i,n,nb,isrc,nproc,iproc,*idx; 909*d24d4204SJose E. Roman 910*d24d4204SJose E. Roman PetscFunctionBegin; 911*d24d4204SJose E. Roman if (rows) { 912*d24d4204SJose E. Roman n = a->locr; 913*d24d4204SJose E. Roman nb = a->mb; 914*d24d4204SJose E. Roman isrc = a->rsrc; 915*d24d4204SJose E. Roman nproc = a->grid->nprow; 916*d24d4204SJose E. Roman iproc = a->grid->myrow; 917*d24d4204SJose E. Roman ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 918*d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 919*d24d4204SJose E. Roman ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 920*d24d4204SJose E. Roman } 921*d24d4204SJose E. Roman if (cols) { 922*d24d4204SJose E. Roman n = a->locc; 923*d24d4204SJose E. Roman nb = a->nb; 924*d24d4204SJose E. Roman isrc = a->csrc; 925*d24d4204SJose E. Roman nproc = a->grid->npcol; 926*d24d4204SJose E. Roman iproc = a->grid->mycol; 927*d24d4204SJose E. Roman ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 928*d24d4204SJose E. Roman for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 929*d24d4204SJose E. Roman ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 930*d24d4204SJose E. Roman } 931*d24d4204SJose E. Roman PetscFunctionReturn(0); 932*d24d4204SJose E. Roman } 933*d24d4204SJose E. Roman 934*d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 935*d24d4204SJose E. Roman { 936*d24d4204SJose E. Roman PetscErrorCode ierr; 937*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 938*d24d4204SJose E. Roman Mat Bmpi; 939*d24d4204SJose E. Roman MPI_Comm comm; 940*d24d4204SJose E. Roman const PetscInt *ranges; 941*d24d4204SJose E. Roman PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 942*d24d4204SJose E. Roman PetscScalar *barray; 943*d24d4204SJose E. Roman 944*d24d4204SJose E. Roman PetscFunctionBegin; 945*d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 946*d24d4204SJose E. Roman 947*d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 948*d24d4204SJose E. Roman else { 949*d24d4204SJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 950*d24d4204SJose E. Roman ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 951*d24d4204SJose E. Roman ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 952*d24d4204SJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 953*d24d4204SJose E. Roman } 954*d24d4204SJose E. Roman 955*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */ 956*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 957*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 958*d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 959*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 960*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 961*d24d4204SJose E. Roman 962*d24d4204SJose E. Roman /* redistribute matrix */ 963*d24d4204SJose E. Roman ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 964*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 965*d24d4204SJose E. Roman ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 966*d24d4204SJose E. Roman 967*d24d4204SJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968*d24d4204SJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969*d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 970*d24d4204SJose E. Roman ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 971*d24d4204SJose E. Roman } else *B = Bmpi; 972*d24d4204SJose E. Roman PetscFunctionReturn(0); 973*d24d4204SJose E. Roman } 974*d24d4204SJose E. Roman 975*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 976*d24d4204SJose E. Roman { 977*d24d4204SJose E. Roman PetscErrorCode ierr; 978*d24d4204SJose E. Roman Mat_ScaLAPACK *b; 979*d24d4204SJose E. Roman Mat Bmpi; 980*d24d4204SJose E. Roman MPI_Comm comm; 981*d24d4204SJose E. Roman const PetscInt *ranges; 982*d24d4204SJose E. Roman PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 983*d24d4204SJose E. Roman PetscScalar *aarray; 984*d24d4204SJose E. Roman 985*d24d4204SJose E. Roman PetscFunctionBegin; 986*d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 987*d24d4204SJose E. Roman 988*d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 989*d24d4204SJose E. Roman else { 990*d24d4204SJose E. Roman ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 991*d24d4204SJose E. Roman ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 992*d24d4204SJose E. Roman ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 993*d24d4204SJose E. Roman ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 994*d24d4204SJose E. Roman } 995*d24d4204SJose E. Roman b = (Mat_ScaLAPACK*)Bmpi->data; 996*d24d4204SJose E. Roman 997*d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */ 998*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 999*d24d4204SJose E. Roman ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1000*d24d4204SJose E. Roman lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1001*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1002*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1003*d24d4204SJose E. Roman 1004*d24d4204SJose E. Roman /* redistribute matrix */ 1005*d24d4204SJose E. Roman ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1006*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1007*d24d4204SJose E. Roman ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1008*d24d4204SJose E. Roman 1009*d24d4204SJose E. Roman ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1010*d24d4204SJose E. Roman ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1011*d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { 1012*d24d4204SJose E. Roman ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1013*d24d4204SJose E. Roman } else *B = Bmpi; 1014*d24d4204SJose E. Roman PetscFunctionReturn(0); 1015*d24d4204SJose E. Roman } 1016*d24d4204SJose E. Roman 1017*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1018*d24d4204SJose E. Roman { 1019*d24d4204SJose E. Roman Mat mat_scal; 1020*d24d4204SJose E. Roman PetscErrorCode ierr; 1021*d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1022*d24d4204SJose E. Roman const PetscInt *cols; 1023*d24d4204SJose E. Roman const PetscScalar *vals; 1024*d24d4204SJose E. Roman 1025*d24d4204SJose E. Roman PetscFunctionBegin; 1026*d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1027*d24d4204SJose E. Roman mat_scal = *newmat; 1028*d24d4204SJose E. Roman ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1029*d24d4204SJose E. Roman } else { 1030*d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1031*d24d4204SJose E. Roman m = PETSC_DECIDE; 1032*d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1033*d24d4204SJose E. Roman n = PETSC_DECIDE; 1034*d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1035*d24d4204SJose E. Roman ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1036*d24d4204SJose E. Roman ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1037*d24d4204SJose E. Roman ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1038*d24d4204SJose E. Roman } 1039*d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 1040*d24d4204SJose E. Roman ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1041*d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1042*d24d4204SJose E. Roman ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1043*d24d4204SJose E. Roman } 1044*d24d4204SJose E. Roman ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1045*d24d4204SJose E. Roman ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1046*d24d4204SJose E. Roman 1047*d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1048*d24d4204SJose E. Roman else *newmat = mat_scal; 1049*d24d4204SJose E. Roman PetscFunctionReturn(0); 1050*d24d4204SJose E. Roman } 1051*d24d4204SJose E. Roman 1052*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1053*d24d4204SJose E. Roman { 1054*d24d4204SJose E. Roman Mat mat_scal; 1055*d24d4204SJose E. Roman PetscErrorCode ierr; 1056*d24d4204SJose E. Roman PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1057*d24d4204SJose E. Roman const PetscInt *cols; 1058*d24d4204SJose E. Roman const PetscScalar *vals; 1059*d24d4204SJose E. Roman PetscScalar v; 1060*d24d4204SJose E. Roman 1061*d24d4204SJose E. Roman PetscFunctionBegin; 1062*d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) { 1063*d24d4204SJose E. Roman mat_scal = *newmat; 1064*d24d4204SJose E. Roman ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1065*d24d4204SJose E. Roman } else { 1066*d24d4204SJose E. Roman ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1067*d24d4204SJose E. Roman m = PETSC_DECIDE; 1068*d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1069*d24d4204SJose E. Roman n = PETSC_DECIDE; 1070*d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1071*d24d4204SJose E. Roman ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1072*d24d4204SJose E. Roman ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1073*d24d4204SJose E. Roman ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1074*d24d4204SJose E. Roman } 1075*d24d4204SJose E. Roman ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1076*d24d4204SJose E. Roman for (row=rstart;row<rend;row++) { 1077*d24d4204SJose E. Roman ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1078*d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1079*d24d4204SJose E. Roman for (j=0;j<ncols;j++) { /* lower triangular part */ 1080*d24d4204SJose E. Roman if (cols[j] == row) continue; 1081*d24d4204SJose E. Roman v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1082*d24d4204SJose E. Roman ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1083*d24d4204SJose E. Roman } 1084*d24d4204SJose E. Roman ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1085*d24d4204SJose E. Roman } 1086*d24d4204SJose E. Roman ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1087*d24d4204SJose E. Roman ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1088*d24d4204SJose E. Roman ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1089*d24d4204SJose E. Roman 1090*d24d4204SJose E. Roman if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1091*d24d4204SJose E. Roman else *newmat = mat_scal; 1092*d24d4204SJose E. Roman PetscFunctionReturn(0); 1093*d24d4204SJose E. Roman } 1094*d24d4204SJose E. Roman 1095*d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1096*d24d4204SJose E. Roman { 1097*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1098*d24d4204SJose E. Roman PetscErrorCode ierr; 1099*d24d4204SJose E. Roman PetscInt sz=0; 1100*d24d4204SJose E. Roman 1101*d24d4204SJose E. Roman PetscFunctionBegin; 1102*d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1103*d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1104*d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr; 1105*d24d4204SJose E. Roman 1106*d24d4204SJose E. Roman ierr = PetscFree(a->loc);CHKERRQ(ierr); 1107*d24d4204SJose E. Roman ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1108*d24d4204SJose E. Roman ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1109*d24d4204SJose E. Roman ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1110*d24d4204SJose E. Roman 1111*d24d4204SJose E. Roman A->preallocated = PETSC_TRUE; 1112*d24d4204SJose E. Roman PetscFunctionReturn(0); 1113*d24d4204SJose E. Roman } 1114*d24d4204SJose E. Roman 1115*d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1116*d24d4204SJose E. Roman { 1117*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1118*d24d4204SJose E. Roman PetscErrorCode ierr; 1119*d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1120*d24d4204SJose E. Roman PetscBool flg; 1121*d24d4204SJose E. Roman MPI_Comm icomm; 1122*d24d4204SJose E. Roman 1123*d24d4204SJose E. Roman PetscFunctionBegin; 1124*d24d4204SJose E. Roman ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1125*d24d4204SJose E. Roman ierr = PetscFree(a->loc);CHKERRQ(ierr); 1126*d24d4204SJose E. Roman ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1127*d24d4204SJose E. Roman ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1128*d24d4204SJose E. Roman ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1129*d24d4204SJose E. Roman if (--grid->grid_refct == 0) { 1130*d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt); 1131*d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow); 1132*d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol); 1133*d24d4204SJose E. Roman ierr = PetscFree(grid);CHKERRQ(ierr); 1134*d24d4204SJose E. Roman ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 1135*d24d4204SJose E. Roman } 1136*d24d4204SJose E. Roman ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1137*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1138*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1139*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1140*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1141*d24d4204SJose E. Roman ierr = PetscFree(A->data);CHKERRQ(ierr); 1142*d24d4204SJose E. Roman PetscFunctionReturn(0); 1143*d24d4204SJose E. Roman } 1144*d24d4204SJose E. Roman 1145*d24d4204SJose E. Roman PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1146*d24d4204SJose E. Roman { 1147*d24d4204SJose E. Roman PetscErrorCode ierr; 1148*d24d4204SJose E. Roman const PetscInt *ranges; 1149*d24d4204SJose E. Roman PetscMPIInt size; 1150*d24d4204SJose E. Roman PetscInt i,n; 1151*d24d4204SJose E. Roman 1152*d24d4204SJose E. Roman PetscFunctionBegin; 1153*d24d4204SJose E. Roman ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr); 1154*d24d4204SJose E. Roman if (size>2) { 1155*d24d4204SJose E. Roman ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1156*d24d4204SJose E. Roman n = ranges[1]-ranges[0]; 1157*d24d4204SJose E. Roman for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1158*d24d4204SJose E. Roman if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1159*d24d4204SJose E. Roman } 1160*d24d4204SJose E. Roman PetscFunctionReturn(0); 1161*d24d4204SJose E. Roman } 1162*d24d4204SJose E. Roman 1163*d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1164*d24d4204SJose E. Roman { 1165*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1166*d24d4204SJose E. Roman PetscErrorCode ierr; 1167*d24d4204SJose E. Roman PetscBLASInt info=0; 1168*d24d4204SJose E. Roman 1169*d24d4204SJose E. Roman PetscFunctionBegin; 1170*d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1171*d24d4204SJose E. Roman ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1172*d24d4204SJose E. Roman 1173*d24d4204SJose E. Roman /* check that the layout is as enforced by MatCreateScaLAPACK */ 1174*d24d4204SJose E. Roman ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1175*d24d4204SJose E. Roman ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1176*d24d4204SJose E. Roman 1177*d24d4204SJose E. Roman /* compute local sizes */ 1178*d24d4204SJose E. Roman ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1179*d24d4204SJose E. Roman ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1180*d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1181*d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1182*d24d4204SJose E. Roman a->lld = PetscMax(1,a->locr); 1183*d24d4204SJose E. Roman 1184*d24d4204SJose E. Roman /* allocate local array */ 1185*d24d4204SJose E. Roman ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1186*d24d4204SJose E. Roman 1187*d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */ 1188*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1189*d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit",info); 1190*d24d4204SJose E. Roman PetscFunctionReturn(0); 1191*d24d4204SJose E. Roman } 1192*d24d4204SJose E. Roman 1193*d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1194*d24d4204SJose E. Roman { 1195*d24d4204SJose E. Roman PetscErrorCode ierr; 1196*d24d4204SJose E. Roman PetscInt nstash,reallocs; 1197*d24d4204SJose E. Roman 1198*d24d4204SJose E. Roman PetscFunctionBegin; 1199*d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1200*d24d4204SJose E. Roman ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1201*d24d4204SJose E. Roman ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1202*d24d4204SJose E. Roman ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1203*d24d4204SJose E. Roman PetscFunctionReturn(0); 1204*d24d4204SJose E. Roman } 1205*d24d4204SJose E. Roman 1206*d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1207*d24d4204SJose E. Roman { 1208*d24d4204SJose E. Roman PetscErrorCode ierr; 1209*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1210*d24d4204SJose E. Roman PetscMPIInt n; 1211*d24d4204SJose E. Roman PetscInt i,flg,*row,*col; 1212*d24d4204SJose E. Roman PetscScalar *val; 1213*d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1214*d24d4204SJose E. Roman 1215*d24d4204SJose E. Roman PetscFunctionBegin; 1216*d24d4204SJose E. Roman if (A->nooffprocentries) PetscFunctionReturn(0); 1217*d24d4204SJose E. Roman while (1) { 1218*d24d4204SJose E. Roman ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1219*d24d4204SJose E. Roman if (!flg) break; 1220*d24d4204SJose E. Roman for (i=0;i<n;i++) { 1221*d24d4204SJose E. Roman ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1222*d24d4204SJose E. Roman ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1223*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1224*d24d4204SJose E. Roman if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1225*d24d4204SJose E. Roman switch (A->insertmode) { 1226*d24d4204SJose E. Roman case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1227*d24d4204SJose E. Roman case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1228*d24d4204SJose E. Roman default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1229*d24d4204SJose E. Roman } 1230*d24d4204SJose E. Roman } 1231*d24d4204SJose E. Roman } 1232*d24d4204SJose E. Roman ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1233*d24d4204SJose E. Roman PetscFunctionReturn(0); 1234*d24d4204SJose E. Roman } 1235*d24d4204SJose E. Roman 1236*d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1237*d24d4204SJose E. Roman { 1238*d24d4204SJose E. Roman PetscErrorCode ierr; 1239*d24d4204SJose E. Roman Mat Adense,As; 1240*d24d4204SJose E. Roman MPI_Comm comm; 1241*d24d4204SJose E. Roman 1242*d24d4204SJose E. Roman PetscFunctionBegin; 1243*d24d4204SJose E. Roman ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1244*d24d4204SJose E. Roman ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1245*d24d4204SJose E. Roman ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1246*d24d4204SJose E. Roman ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1247*d24d4204SJose E. Roman ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1248*d24d4204SJose E. Roman ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1249*d24d4204SJose E. Roman ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1250*d24d4204SJose E. Roman PetscFunctionReturn(0); 1251*d24d4204SJose E. Roman } 1252*d24d4204SJose E. Roman 1253*d24d4204SJose E. Roman /* -------------------------------------------------------------------*/ 1254*d24d4204SJose E. Roman static struct _MatOps MatOps_Values = { 1255*d24d4204SJose E. Roman MatSetValues_ScaLAPACK, 1256*d24d4204SJose E. Roman 0, 1257*d24d4204SJose E. Roman 0, 1258*d24d4204SJose E. Roman MatMult_ScaLAPACK, 1259*d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK, 1260*d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK, 1261*d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK, 1262*d24d4204SJose E. Roman MatSolve_ScaLAPACK, 1263*d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK, 1264*d24d4204SJose E. Roman 0, 1265*d24d4204SJose E. Roman /*10*/ 0, 1266*d24d4204SJose E. Roman MatLUFactor_ScaLAPACK, 1267*d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK, 1268*d24d4204SJose E. Roman 0, 1269*d24d4204SJose E. Roman MatTranspose_ScaLAPACK, 1270*d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK, 1271*d24d4204SJose E. Roman 0, 1272*d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK, 1273*d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK, 1274*d24d4204SJose E. Roman MatNorm_ScaLAPACK, 1275*d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK, 1276*d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK, 1277*d24d4204SJose E. Roman MatSetOption_ScaLAPACK, 1278*d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK, 1279*d24d4204SJose E. Roman /*24*/ 0, 1280*d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK, 1281*d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK, 1282*d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK, 1283*d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK, 1284*d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK, 1285*d24d4204SJose E. Roman 0, 1286*d24d4204SJose E. Roman 0, 1287*d24d4204SJose E. Roman 0, 1288*d24d4204SJose E. Roman 0, 1289*d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK, 1290*d24d4204SJose E. Roman 0, 1291*d24d4204SJose E. Roman 0, 1292*d24d4204SJose E. Roman 0, 1293*d24d4204SJose E. Roman 0, 1294*d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK, 1295*d24d4204SJose E. Roman 0, 1296*d24d4204SJose E. Roman 0, 1297*d24d4204SJose E. Roman 0, 1298*d24d4204SJose E. Roman MatCopy_ScaLAPACK, 1299*d24d4204SJose E. Roman /*44*/ 0, 1300*d24d4204SJose E. Roman MatScale_ScaLAPACK, 1301*d24d4204SJose E. Roman MatShift_ScaLAPACK, 1302*d24d4204SJose E. Roman 0, 1303*d24d4204SJose E. Roman 0, 1304*d24d4204SJose E. Roman /*49*/ 0, 1305*d24d4204SJose E. Roman 0, 1306*d24d4204SJose E. Roman 0, 1307*d24d4204SJose E. Roman 0, 1308*d24d4204SJose E. Roman 0, 1309*d24d4204SJose E. Roman /*54*/ 0, 1310*d24d4204SJose E. Roman 0, 1311*d24d4204SJose E. Roman 0, 1312*d24d4204SJose E. Roman 0, 1313*d24d4204SJose E. Roman 0, 1314*d24d4204SJose E. Roman /*59*/ 0, 1315*d24d4204SJose E. Roman MatDestroy_ScaLAPACK, 1316*d24d4204SJose E. Roman MatView_ScaLAPACK, 1317*d24d4204SJose E. Roman 0, 1318*d24d4204SJose E. Roman 0, 1319*d24d4204SJose E. Roman /*64*/ 0, 1320*d24d4204SJose E. Roman 0, 1321*d24d4204SJose E. Roman 0, 1322*d24d4204SJose E. Roman 0, 1323*d24d4204SJose E. Roman 0, 1324*d24d4204SJose E. Roman /*69*/ 0, 1325*d24d4204SJose E. Roman 0, 1326*d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense, 1327*d24d4204SJose E. Roman 0, 1328*d24d4204SJose E. Roman 0, 1329*d24d4204SJose E. Roman /*74*/ 0, 1330*d24d4204SJose E. Roman 0, 1331*d24d4204SJose E. Roman 0, 1332*d24d4204SJose E. Roman 0, 1333*d24d4204SJose E. Roman 0, 1334*d24d4204SJose E. Roman /*79*/ 0, 1335*d24d4204SJose E. Roman 0, 1336*d24d4204SJose E. Roman 0, 1337*d24d4204SJose E. Roman 0, 1338*d24d4204SJose E. Roman MatLoad_ScaLAPACK, 1339*d24d4204SJose E. Roman /*84*/ 0, 1340*d24d4204SJose E. Roman 0, 1341*d24d4204SJose E. Roman 0, 1342*d24d4204SJose E. Roman 0, 1343*d24d4204SJose E. Roman 0, 1344*d24d4204SJose E. Roman /*89*/ 0, 1345*d24d4204SJose E. Roman 0, 1346*d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK, 1347*d24d4204SJose E. Roman 0, 1348*d24d4204SJose E. Roman 0, 1349*d24d4204SJose E. Roman /*94*/ 0, 1350*d24d4204SJose E. Roman 0, 1351*d24d4204SJose E. Roman 0, 1352*d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK, 1353*d24d4204SJose E. Roman 0, 1354*d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK, 1355*d24d4204SJose E. Roman 0, 1356*d24d4204SJose E. Roman 0, 1357*d24d4204SJose E. Roman MatConjugate_ScaLAPACK, 1358*d24d4204SJose E. Roman 0, 1359*d24d4204SJose E. Roman /*104*/0, 1360*d24d4204SJose E. Roman 0, 1361*d24d4204SJose E. Roman 0, 1362*d24d4204SJose E. Roman 0, 1363*d24d4204SJose E. Roman 0, 1364*d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK, 1365*d24d4204SJose E. Roman 0, 1366*d24d4204SJose E. Roman 0, 1367*d24d4204SJose E. Roman 0, 1368*d24d4204SJose E. Roman MatMissingDiagonal_ScaLAPACK, 1369*d24d4204SJose E. Roman /*114*/0, 1370*d24d4204SJose E. Roman 0, 1371*d24d4204SJose E. Roman 0, 1372*d24d4204SJose E. Roman 0, 1373*d24d4204SJose E. Roman 0, 1374*d24d4204SJose E. Roman /*119*/0, 1375*d24d4204SJose E. Roman MatHermitianTranspose_ScaLAPACK, 1376*d24d4204SJose E. Roman 0, 1377*d24d4204SJose E. Roman 0, 1378*d24d4204SJose E. Roman 0, 1379*d24d4204SJose E. Roman /*124*/0, 1380*d24d4204SJose E. Roman 0, 1381*d24d4204SJose E. Roman 0, 1382*d24d4204SJose E. Roman 0, 1383*d24d4204SJose E. Roman 0, 1384*d24d4204SJose E. Roman /*129*/0, 1385*d24d4204SJose E. Roman 0, 1386*d24d4204SJose E. Roman 0, 1387*d24d4204SJose E. Roman 0, 1388*d24d4204SJose E. Roman 0, 1389*d24d4204SJose E. Roman /*134*/0, 1390*d24d4204SJose E. Roman 0, 1391*d24d4204SJose E. Roman 0, 1392*d24d4204SJose E. Roman 0, 1393*d24d4204SJose E. Roman 0, 1394*d24d4204SJose E. Roman 0, 1395*d24d4204SJose E. Roman /*140*/0, 1396*d24d4204SJose E. Roman 0, 1397*d24d4204SJose E. Roman 0, 1398*d24d4204SJose E. Roman 0, 1399*d24d4204SJose E. Roman 0, 1400*d24d4204SJose E. Roman /*145*/0, 1401*d24d4204SJose E. Roman 0, 1402*d24d4204SJose E. Roman 0 1403*d24d4204SJose E. Roman }; 1404*d24d4204SJose E. Roman 1405*d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1406*d24d4204SJose E. Roman { 1407*d24d4204SJose E. Roman PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1408*d24d4204SJose E. Roman PetscInt size=stash->size,nsends; 1409*d24d4204SJose E. Roman PetscErrorCode ierr; 1410*d24d4204SJose E. Roman PetscInt count,*sindices,**rindices,i,j,l; 1411*d24d4204SJose E. Roman PetscScalar **rvalues,*svalues; 1412*d24d4204SJose E. Roman MPI_Comm comm = stash->comm; 1413*d24d4204SJose E. Roman MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1414*d24d4204SJose E. Roman PetscMPIInt *sizes,*nlengths,nreceives; 1415*d24d4204SJose E. Roman PetscInt *sp_idx,*sp_idy; 1416*d24d4204SJose E. Roman PetscScalar *sp_val; 1417*d24d4204SJose E. Roman PetscMatStashSpace space,space_next; 1418*d24d4204SJose E. Roman PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1419*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1420*d24d4204SJose E. Roman 1421*d24d4204SJose E. Roman PetscFunctionBegin; 1422*d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1423*d24d4204SJose E. Roman InsertMode addv; 1424*d24d4204SJose E. Roman ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425*d24d4204SJose E. Roman if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1426*d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */ 1427*d24d4204SJose E. Roman } 1428*d24d4204SJose E. Roman 1429*d24d4204SJose E. Roman bs2 = stash->bs*stash->bs; 1430*d24d4204SJose E. Roman 1431*d24d4204SJose E. Roman /* first count number of contributors to each processor */ 1432*d24d4204SJose E. Roman ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1433*d24d4204SJose E. Roman ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1434*d24d4204SJose E. Roman 1435*d24d4204SJose E. Roman i = j = 0; 1436*d24d4204SJose E. Roman space = stash->space_head; 1437*d24d4204SJose E. Roman while (space) { 1438*d24d4204SJose E. Roman space_next = space->next; 1439*d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1440*d24d4204SJose E. Roman ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1441*d24d4204SJose E. Roman ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1442*d24d4204SJose E. Roman PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1443*d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1444*d24d4204SJose E. Roman nlengths[j]++; owner[i] = j; 1445*d24d4204SJose E. Roman i++; 1446*d24d4204SJose E. Roman } 1447*d24d4204SJose E. Roman space = space_next; 1448*d24d4204SJose E. Roman } 1449*d24d4204SJose E. Roman 1450*d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */ 1451*d24d4204SJose E. Roman ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1452*d24d4204SJose E. Roman for (i=0, nsends=0; i<size; i++) { 1453*d24d4204SJose E. Roman if (nlengths[i]) { 1454*d24d4204SJose E. Roman sizes[i] = 1; nsends++; 1455*d24d4204SJose E. Roman } 1456*d24d4204SJose E. Roman } 1457*d24d4204SJose E. Roman 1458*d24d4204SJose E. Roman {PetscMPIInt *onodes,*olengths; 1459*d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */ 1460*d24d4204SJose E. Roman ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1461*d24d4204SJose E. Roman ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1462*d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */ 1463*d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] *=2; 1464*d24d4204SJose E. Roman ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1465*d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1466*d24d4204SJose E. Roman for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1467*d24d4204SJose E. Roman ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1468*d24d4204SJose E. Roman ierr = PetscFree(onodes);CHKERRQ(ierr); 1469*d24d4204SJose E. Roman ierr = PetscFree(olengths);CHKERRQ(ierr);} 1470*d24d4204SJose E. Roman 1471*d24d4204SJose E. Roman /* do sends: 1472*d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to 1473*d24d4204SJose E. Roman the ith processor 1474*d24d4204SJose E. Roman */ 1475*d24d4204SJose E. Roman ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1476*d24d4204SJose E. Roman ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1477*d24d4204SJose E. Roman ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1478*d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */ 1479*d24d4204SJose E. Roman startv[0] = 0; starti[0] = 0; 1480*d24d4204SJose E. Roman for (i=1; i<size; i++) { 1481*d24d4204SJose E. Roman startv[i] = startv[i-1] + nlengths[i-1]; 1482*d24d4204SJose E. Roman starti[i] = starti[i-1] + 2*nlengths[i-1]; 1483*d24d4204SJose E. Roman } 1484*d24d4204SJose E. Roman 1485*d24d4204SJose E. Roman i = 0; 1486*d24d4204SJose E. Roman space = stash->space_head; 1487*d24d4204SJose E. Roman while (space) { 1488*d24d4204SJose E. Roman space_next = space->next; 1489*d24d4204SJose E. Roman sp_idx = space->idx; 1490*d24d4204SJose E. Roman sp_idy = space->idy; 1491*d24d4204SJose E. Roman sp_val = space->val; 1492*d24d4204SJose E. Roman for (l=0; l<space->local_used; l++) { 1493*d24d4204SJose E. Roman j = owner[i]; 1494*d24d4204SJose E. Roman if (bs2 == 1) { 1495*d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l]; 1496*d24d4204SJose E. Roman } else { 1497*d24d4204SJose E. Roman PetscInt k; 1498*d24d4204SJose E. Roman PetscScalar *buf1,*buf2; 1499*d24d4204SJose E. Roman buf1 = svalues+bs2*startv[j]; 1500*d24d4204SJose E. Roman buf2 = space->val + bs2*l; 1501*d24d4204SJose E. Roman for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1502*d24d4204SJose E. Roman } 1503*d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l]; 1504*d24d4204SJose E. Roman sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1505*d24d4204SJose E. Roman startv[j]++; 1506*d24d4204SJose E. Roman starti[j]++; 1507*d24d4204SJose E. Roman i++; 1508*d24d4204SJose E. Roman } 1509*d24d4204SJose E. Roman space = space_next; 1510*d24d4204SJose E. Roman } 1511*d24d4204SJose E. Roman startv[0] = 0; 1512*d24d4204SJose E. Roman for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1513*d24d4204SJose E. Roman 1514*d24d4204SJose E. Roman for (i=0,count=0; i<size; i++) { 1515*d24d4204SJose E. Roman if (sizes[i]) { 1516*d24d4204SJose E. Roman ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 1517*d24d4204SJose E. Roman ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 1518*d24d4204SJose E. Roman } 1519*d24d4204SJose E. Roman } 1520*d24d4204SJose E. Roman #if defined(PETSC_USE_INFO) 1521*d24d4204SJose E. Roman ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1522*d24d4204SJose E. Roman for (i=0; i<size; i++) { 1523*d24d4204SJose E. Roman if (sizes[i]) { 1524*d24d4204SJose E. Roman ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1525*d24d4204SJose E. Roman } 1526*d24d4204SJose E. Roman } 1527*d24d4204SJose E. Roman #endif 1528*d24d4204SJose E. Roman ierr = PetscFree(nlengths);CHKERRQ(ierr); 1529*d24d4204SJose E. Roman ierr = PetscFree(owner);CHKERRQ(ierr); 1530*d24d4204SJose E. Roman ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1531*d24d4204SJose E. Roman ierr = PetscFree(sizes);CHKERRQ(ierr); 1532*d24d4204SJose E. Roman 1533*d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1534*d24d4204SJose E. Roman ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1535*d24d4204SJose E. Roman 1536*d24d4204SJose E. Roman for (i=0; i<nreceives; i++) { 1537*d24d4204SJose E. Roman recv_waits[2*i] = recv_waits1[i]; 1538*d24d4204SJose E. Roman recv_waits[2*i+1] = recv_waits2[i]; 1539*d24d4204SJose E. Roman } 1540*d24d4204SJose E. Roman stash->recv_waits = recv_waits; 1541*d24d4204SJose E. Roman 1542*d24d4204SJose E. Roman ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1543*d24d4204SJose E. Roman ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1544*d24d4204SJose E. Roman 1545*d24d4204SJose E. Roman stash->svalues = svalues; 1546*d24d4204SJose E. Roman stash->sindices = sindices; 1547*d24d4204SJose E. Roman stash->rvalues = rvalues; 1548*d24d4204SJose E. Roman stash->rindices = rindices; 1549*d24d4204SJose E. Roman stash->send_waits = send_waits; 1550*d24d4204SJose E. Roman stash->nsends = nsends; 1551*d24d4204SJose E. Roman stash->nrecvs = nreceives; 1552*d24d4204SJose E. Roman stash->reproduce_count = 0; 1553*d24d4204SJose E. Roman PetscFunctionReturn(0); 1554*d24d4204SJose E. Roman } 1555*d24d4204SJose E. Roman 1556*d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1557*d24d4204SJose E. Roman { 1558*d24d4204SJose E. Roman PetscErrorCode ierr; 1559*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1560*d24d4204SJose E. Roman 1561*d24d4204SJose E. Roman PetscFunctionBegin; 1562*d24d4204SJose E. Roman if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1563*d24d4204SJose E. Roman if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1564*d24d4204SJose E. Roman if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1565*d24d4204SJose E. Roman ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1566*d24d4204SJose E. Roman ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1567*d24d4204SJose E. Roman PetscFunctionReturn(0); 1568*d24d4204SJose E. Roman } 1569*d24d4204SJose E. Roman 1570*d24d4204SJose E. Roman /*@ 1571*d24d4204SJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1572*d24d4204SJose E. Roman the ScaLAPACK matrix 1573*d24d4204SJose E. Roman 1574*d24d4204SJose E. Roman Logically Collective on A 1575*d24d4204SJose E. Roman 1576*d24d4204SJose E. Roman Input Parameter: 1577*d24d4204SJose E. Roman + A - a MATSCALAPACK matrix 1578*d24d4204SJose E. Roman . mb - the row block size 1579*d24d4204SJose E. Roman - nb - the column block size 1580*d24d4204SJose E. Roman 1581*d24d4204SJose E. Roman Level: intermediate 1582*d24d4204SJose E. Roman 1583*d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1584*d24d4204SJose E. Roman @*/ 1585*d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1586*d24d4204SJose E. Roman { 1587*d24d4204SJose E. Roman PetscErrorCode ierr; 1588*d24d4204SJose E. Roman 1589*d24d4204SJose E. Roman PetscFunctionBegin; 1590*d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1591*d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,mb,2); 1592*d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A,nb,3); 1593*d24d4204SJose E. Roman ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1594*d24d4204SJose E. Roman PetscFunctionReturn(0); 1595*d24d4204SJose E. Roman } 1596*d24d4204SJose E. Roman 1597*d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1598*d24d4204SJose E. Roman { 1599*d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1600*d24d4204SJose E. Roman 1601*d24d4204SJose E. Roman PetscFunctionBegin; 1602*d24d4204SJose E. Roman if (mb) *mb = a->mb; 1603*d24d4204SJose E. Roman if (nb) *nb = a->nb; 1604*d24d4204SJose E. Roman PetscFunctionReturn(0); 1605*d24d4204SJose E. Roman } 1606*d24d4204SJose E. Roman 1607*d24d4204SJose E. Roman /*@ 1608*d24d4204SJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1609*d24d4204SJose E. Roman the ScaLAPACK matrix 1610*d24d4204SJose E. Roman 1611*d24d4204SJose E. Roman Not collective 1612*d24d4204SJose E. Roman 1613*d24d4204SJose E. Roman Input Parameter: 1614*d24d4204SJose E. Roman . A - a MATSCALAPACK matrix 1615*d24d4204SJose E. Roman 1616*d24d4204SJose E. Roman Output Parameters: 1617*d24d4204SJose E. Roman + mb - the row block size 1618*d24d4204SJose E. Roman - nb - the column block size 1619*d24d4204SJose E. Roman 1620*d24d4204SJose E. Roman Level: intermediate 1621*d24d4204SJose E. Roman 1622*d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1623*d24d4204SJose E. Roman @*/ 1624*d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1625*d24d4204SJose E. Roman { 1626*d24d4204SJose E. Roman PetscErrorCode ierr; 1627*d24d4204SJose E. Roman 1628*d24d4204SJose E. Roman PetscFunctionBegin; 1629*d24d4204SJose E. Roman PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1630*d24d4204SJose E. Roman ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1631*d24d4204SJose E. Roman PetscFunctionReturn(0); 1632*d24d4204SJose E. Roman } 1633*d24d4204SJose E. Roman 1634*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1635*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1636*d24d4204SJose E. Roman 1637*d24d4204SJose E. Roman /*MC 1638*d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1639*d24d4204SJose E. Roman 1640*d24d4204SJose E. Roman Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1641*d24d4204SJose E. Roman 1642*d24d4204SJose E. Roman Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1643*d24d4204SJose E. Roman 1644*d24d4204SJose E. Roman Options Database Keys: 1645*d24d4204SJose E. Roman + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1646*d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1647*d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1648*d24d4204SJose E. Roman 1649*d24d4204SJose E. Roman Level: beginner 1650*d24d4204SJose E. Roman 1651*d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL 1652*d24d4204SJose E. Roman M*/ 1653*d24d4204SJose E. Roman 1654*d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1655*d24d4204SJose E. Roman { 1656*d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1657*d24d4204SJose E. Roman PetscErrorCode ierr; 1658*d24d4204SJose E. Roman PetscBool flg,flg1; 1659*d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid; 1660*d24d4204SJose E. Roman MPI_Comm icomm; 1661*d24d4204SJose E. Roman PetscBLASInt nprow,npcol,myrow,mycol; 1662*d24d4204SJose E. Roman PetscInt optv1,k=2,array[2]={0,0}; 1663*d24d4204SJose E. Roman PetscMPIInt size; 1664*d24d4204SJose E. Roman 1665*d24d4204SJose E. Roman PetscFunctionBegin; 1666*d24d4204SJose E. Roman ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1667*d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES; 1668*d24d4204SJose E. Roman 1669*d24d4204SJose E. Roman ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1670*d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1671*d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1672*d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1673*d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL; 1674*d24d4204SJose E. Roman 1675*d24d4204SJose E. Roman ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1676*d24d4204SJose E. Roman A->data = (void*)a; 1677*d24d4204SJose E. Roman 1678*d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1679*d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1680*d24d4204SJose E. Roman ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr); 1681*d24d4204SJose E. Roman } 1682*d24d4204SJose E. Roman ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1683*d24d4204SJose E. Roman ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1684*d24d4204SJose E. Roman if (!flg) { 1685*d24d4204SJose E. Roman ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1686*d24d4204SJose E. Roman 1687*d24d4204SJose E. Roman ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr); 1688*d24d4204SJose E. Roman grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1689*d24d4204SJose E. Roman 1690*d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1691*d24d4204SJose E. Roman ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1692*d24d4204SJose E. Roman if (flg1) { 1693*d24d4204SJose E. Roman if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1694*d24d4204SJose E. Roman grid->nprow = optv1; 1695*d24d4204SJose E. Roman } 1696*d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1697*d24d4204SJose E. Roman 1698*d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1699*d24d4204SJose E. Roman grid->npcol = size/grid->nprow; 1700*d24d4204SJose E. Roman ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1701*d24d4204SJose E. Roman ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1702*d24d4204SJose E. Roman Cblacs_get(-1,0,&grid->ictxt); 1703*d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1704*d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1705*d24d4204SJose E. Roman grid->grid_refct = 1; 1706*d24d4204SJose E. Roman grid->nprow = nprow; 1707*d24d4204SJose E. Roman grid->npcol = npcol; 1708*d24d4204SJose E. Roman grid->myrow = myrow; 1709*d24d4204SJose E. Roman grid->mycol = mycol; 1710*d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1711*d24d4204SJose E. Roman Cblacs_get(-1,0,&grid->ictxrow); 1712*d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1713*d24d4204SJose E. Roman Cblacs_get(-1,0,&grid->ictxcol); 1714*d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1715*d24d4204SJose E. Roman ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr); 1716*d24d4204SJose E. Roman 1717*d24d4204SJose E. Roman } else grid->grid_refct++; 1718*d24d4204SJose E. Roman ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1719*d24d4204SJose E. Roman a->grid = grid; 1720*d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE; 1721*d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE; 1722*d24d4204SJose E. Roman 1723*d24d4204SJose E. Roman ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1724*d24d4204SJose E. Roman ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1725*d24d4204SJose E. Roman if (flg) { 1726*d24d4204SJose E. Roman a->mb = array[0]; 1727*d24d4204SJose E. Roman a->nb = (k>1)? array[1]: a->mb; 1728*d24d4204SJose E. Roman } 1729*d24d4204SJose E. Roman ierr = PetscOptionsEnd();CHKERRQ(ierr); 1730*d24d4204SJose E. Roman 1731*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1732*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1733*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1734*d24d4204SJose E. Roman ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1735*d24d4204SJose E. Roman PetscFunctionReturn(0); 1736*d24d4204SJose E. Roman } 1737*d24d4204SJose E. Roman 1738*d24d4204SJose E. Roman /*@C 1739*d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1740*d24d4204SJose E. Roman (2D block cyclic distribution). 1741*d24d4204SJose E. Roman 1742*d24d4204SJose E. Roman Collective 1743*d24d4204SJose E. Roman 1744*d24d4204SJose E. Roman Input Parameters: 1745*d24d4204SJose E. Roman + comm - MPI communicator 1746*d24d4204SJose E. Roman . mb - row block size (or PETSC_DECIDE to have it set) 1747*d24d4204SJose E. Roman . nb - column block size (or PETSC_DECIDE to have it set) 1748*d24d4204SJose E. Roman . M - number of global rows 1749*d24d4204SJose E. Roman . N - number of global columns 1750*d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix 1751*d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix 1752*d24d4204SJose E. Roman 1753*d24d4204SJose E. Roman Output Parameter: 1754*d24d4204SJose E. Roman . A - the matrix 1755*d24d4204SJose E. Roman 1756*d24d4204SJose E. Roman Options Database Keys: 1757*d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1758*d24d4204SJose E. Roman 1759*d24d4204SJose E. Roman It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1760*d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly. 1761*d24d4204SJose E. Roman [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1762*d24d4204SJose E. Roman 1763*d24d4204SJose E. Roman Notes: 1764*d24d4204SJose E. Roman If PETSC_DECIDE is used for the block sizes, then an appropriate value 1765*d24d4204SJose E. Roman is chosen. 1766*d24d4204SJose E. Roman 1767*d24d4204SJose E. Roman Storage Information: 1768*d24d4204SJose E. Roman Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1769*d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose 1770*d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values 1771*d24d4204SJose E. Roman used for the distributed matrix, not the same meaning as in BAIJ. 1772*d24d4204SJose E. Roman 1773*d24d4204SJose E. Roman Level: intermediate 1774*d24d4204SJose E. Roman 1775*d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1776*d24d4204SJose E. Roman @*/ 1777*d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1778*d24d4204SJose E. Roman { 1779*d24d4204SJose E. Roman PetscErrorCode ierr; 1780*d24d4204SJose E. Roman Mat_ScaLAPACK *a; 1781*d24d4204SJose E. Roman PetscInt m,n; 1782*d24d4204SJose E. Roman 1783*d24d4204SJose E. Roman PetscFunctionBegin; 1784*d24d4204SJose E. Roman ierr = MatCreate(comm,A);CHKERRQ(ierr); 1785*d24d4204SJose E. Roman ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1786*d24d4204SJose E. Roman if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1787*d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1788*d24d4204SJose E. Roman m = PETSC_DECIDE; 1789*d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1790*d24d4204SJose E. Roman n = PETSC_DECIDE; 1791*d24d4204SJose E. Roman ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1792*d24d4204SJose E. Roman ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1793*d24d4204SJose E. Roman a = (Mat_ScaLAPACK*)(*A)->data; 1794*d24d4204SJose E. Roman ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1795*d24d4204SJose E. Roman ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1796*d24d4204SJose E. Roman ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1797*d24d4204SJose E. Roman ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1798*d24d4204SJose E. Roman ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1799*d24d4204SJose E. Roman ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1800*d24d4204SJose E. Roman ierr = MatSetUp(*A);CHKERRQ(ierr); 1801*d24d4204SJose E. Roman PetscFunctionReturn(0); 1802*d24d4204SJose E. Roman } 1803*d24d4204SJose E. Roman 1804