1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(0); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer,&format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); 46 PetscFunctionReturn(0); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(0); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); 53 PetscCall(MatView(Adense,viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 61 PetscLogDouble isend[2],irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld*a->locc; /* locally allocated */ 67 isend[1] = a->locr*a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = ((PetscObject)A)->mem; 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 109 } 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 116 PetscInt i,j; 117 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 122 for (i=0;i<nr;i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i]+1,&gridx)); 125 for (j=0;j<nc;j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 129 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 133 default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 134 } 135 } else { 136 switch (imode) { 137 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break; 138 default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break; 139 } 140 } 141 } else { 142 PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 143 A->assembled = PETSC_FALSE; 144 PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES))); 145 } 146 } 147 } 148 PetscFunctionReturn(0); 149 } 150 151 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 152 { 153 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 154 PetscScalar *x2d,*y2d,alpha=1.0; 155 const PetscInt *ranges; 156 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 157 158 PetscFunctionBegin; 159 if (transpose) { 160 161 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 162 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 163 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 164 xlld = PetscMax(1,A->rmap->n); 165 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 166 PetscCheckScaLapackInfo("descinit",info); 167 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 168 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ 169 ylld = 1; 170 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 171 PetscCheckScaLapackInfo("descinit",info); 172 173 /* allocate 2d vectors */ 174 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 175 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 176 PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 177 xlld = PetscMax(1,lszx); 178 179 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 180 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 181 PetscCheckScaLapackInfo("descinit",info); 182 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 183 PetscCheckScaLapackInfo("descinit",info); 184 185 /* redistribute x as a column of a 2d matrix */ 186 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 187 188 /* redistribute y as a row of a 2d matrix */ 189 if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 190 191 /* call PBLAS subroutine */ 192 PetscCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 193 194 /* redistribute y from a row of a 2d matrix */ 195 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 196 197 } else { /* non-transpose */ 198 199 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 200 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 201 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ 202 xlld = 1; 203 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 204 PetscCheckScaLapackInfo("descinit",info); 205 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 206 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ 207 ylld = PetscMax(1,A->rmap->n); 208 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 209 PetscCheckScaLapackInfo("descinit",info); 210 211 /* allocate 2d vectors */ 212 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 213 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 214 PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 215 ylld = PetscMax(1,lszy); 216 217 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 218 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 219 PetscCheckScaLapackInfo("descinit",info); 220 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 221 PetscCheckScaLapackInfo("descinit",info); 222 223 /* redistribute x as a row of a 2d matrix */ 224 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 225 226 /* redistribute y as a column of a 2d matrix */ 227 if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 228 229 /* call PBLAS subroutine */ 230 PetscCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 231 232 /* redistribute y from a column of a 2d matrix */ 233 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 234 235 } 236 PetscCall(PetscFree2(x2d,y2d)); 237 PetscFunctionReturn(0); 238 } 239 240 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 241 { 242 const PetscScalar *xarray; 243 PetscScalar *yarray; 244 245 PetscFunctionBegin; 246 PetscCall(VecGetArrayRead(x,&xarray)); 247 PetscCall(VecGetArray(y,&yarray)); 248 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); 249 PetscCall(VecRestoreArrayRead(x,&xarray)); 250 PetscCall(VecRestoreArray(y,&yarray)); 251 PetscFunctionReturn(0); 252 } 253 254 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 255 { 256 const PetscScalar *xarray; 257 PetscScalar *yarray; 258 259 PetscFunctionBegin; 260 PetscCall(VecGetArrayRead(x,&xarray)); 261 PetscCall(VecGetArray(y,&yarray)); 262 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); 263 PetscCall(VecRestoreArrayRead(x,&xarray)); 264 PetscCall(VecRestoreArray(y,&yarray)); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 269 { 270 const PetscScalar *xarray; 271 PetscScalar *zarray; 272 273 PetscFunctionBegin; 274 if (y != z) PetscCall(VecCopy(y,z)); 275 PetscCall(VecGetArrayRead(x,&xarray)); 276 PetscCall(VecGetArray(z,&zarray)); 277 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); 278 PetscCall(VecRestoreArrayRead(x,&xarray)); 279 PetscCall(VecRestoreArray(z,&zarray)); 280 PetscFunctionReturn(0); 281 } 282 283 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 284 { 285 const PetscScalar *xarray; 286 PetscScalar *zarray; 287 288 PetscFunctionBegin; 289 if (y != z) PetscCall(VecCopy(y,z)); 290 PetscCall(VecGetArrayRead(x,&xarray)); 291 PetscCall(VecGetArray(z,&zarray)); 292 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); 293 PetscCall(VecRestoreArrayRead(x,&xarray)); 294 PetscCall(VecRestoreArray(z,&zarray)); 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 299 { 300 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 301 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 302 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 303 PetscScalar sone=1.0,zero=0.0; 304 PetscBLASInt one=1; 305 306 PetscFunctionBegin; 307 PetscCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 308 C->assembled = PETSC_TRUE; 309 PetscFunctionReturn(0); 310 } 311 312 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 313 { 314 PetscFunctionBegin; 315 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 316 PetscCall(MatSetType(C,MATSCALAPACK)); 317 PetscCall(MatSetUp(C)); 318 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 319 PetscFunctionReturn(0); 320 } 321 322 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 323 { 324 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 325 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 326 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 327 PetscScalar sone=1.0,zero=0.0; 328 PetscBLASInt one=1; 329 330 PetscFunctionBegin; 331 PetscCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 332 C->assembled = PETSC_TRUE; 333 PetscFunctionReturn(0); 334 } 335 336 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 337 { 338 PetscFunctionBegin; 339 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); 340 PetscCall(MatSetType(C,MATSCALAPACK)); 341 PetscCall(MatSetUp(C)); 342 PetscFunctionReturn(0); 343 } 344 345 /* --------------------------------------- */ 346 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 347 { 348 PetscFunctionBegin; 349 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 350 C->ops->productsymbolic = MatProductSymbolic_AB; 351 PetscFunctionReturn(0); 352 } 353 354 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 355 { 356 PetscFunctionBegin; 357 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 358 C->ops->productsymbolic = MatProductSymbolic_ABt; 359 PetscFunctionReturn(0); 360 } 361 362 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 363 { 364 Mat_Product *product = C->product; 365 366 PetscFunctionBegin; 367 switch (product->type) { 368 case MATPRODUCT_AB: 369 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 370 break; 371 case MATPRODUCT_ABt: 372 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 373 break; 374 default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 375 } 376 PetscFunctionReturn(0); 377 } 378 /* --------------------------------------- */ 379 380 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 381 { 382 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 383 PetscScalar *darray,*d2d,v; 384 const PetscInt *ranges; 385 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 386 387 PetscFunctionBegin; 388 PetscCall(VecGetArray(D,&darray)); 389 390 if (A->rmap->N<=A->cmap->N) { /* row version */ 391 392 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 393 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 394 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 395 dlld = PetscMax(1,A->rmap->n); 396 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 397 PetscCheckScaLapackInfo("descinit",info); 398 399 /* allocate 2d vector */ 400 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 401 PetscCall(PetscCalloc1(lszd,&d2d)); 402 dlld = PetscMax(1,lszd); 403 404 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 405 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 406 PetscCheckScaLapackInfo("descinit",info); 407 408 /* collect diagonal */ 409 for (j=1;j<=a->M;j++) { 410 PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 411 PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 412 } 413 414 /* redistribute d from a column of a 2d matrix */ 415 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 416 PetscCall(PetscFree(d2d)); 417 418 } else { /* column version */ 419 420 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 421 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 422 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 423 dlld = 1; 424 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 425 PetscCheckScaLapackInfo("descinit",info); 426 427 /* allocate 2d vector */ 428 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 429 PetscCall(PetscCalloc1(lszd,&d2d)); 430 431 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 432 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 433 PetscCheckScaLapackInfo("descinit",info); 434 435 /* collect diagonal */ 436 for (j=1;j<=a->N;j++) { 437 PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 438 PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 439 } 440 441 /* redistribute d from a row of a 2d matrix */ 442 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 443 PetscCall(PetscFree(d2d)); 444 } 445 446 PetscCall(VecRestoreArray(D,&darray)); 447 PetscCall(VecAssemblyBegin(D)); 448 PetscCall(VecAssemblyEnd(D)); 449 PetscFunctionReturn(0); 450 } 451 452 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 453 { 454 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 455 const PetscScalar *d; 456 const PetscInt *ranges; 457 PetscScalar *d2d; 458 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 459 460 PetscFunctionBegin; 461 if (R) { 462 PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d)); 463 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 464 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 465 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 466 dlld = 1; 467 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 468 PetscCheckScaLapackInfo("descinit",info); 469 470 /* allocate 2d vector */ 471 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 472 PetscCall(PetscCalloc1(lszd,&d2d)); 473 474 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 475 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 476 PetscCheckScaLapackInfo("descinit",info); 477 478 /* redistribute d to a row of a 2d matrix */ 479 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 480 481 /* broadcast along process columns */ 482 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 483 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 484 485 /* local scaling */ 486 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 487 488 PetscCall(PetscFree(d2d)); 489 PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d)); 490 } 491 if (L) { 492 PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d)); 493 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 494 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 495 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 496 dlld = PetscMax(1,A->rmap->n); 497 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 498 PetscCheckScaLapackInfo("descinit",info); 499 500 /* allocate 2d vector */ 501 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 502 PetscCall(PetscCalloc1(lszd,&d2d)); 503 dlld = PetscMax(1,lszd); 504 505 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 506 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 507 PetscCheckScaLapackInfo("descinit",info); 508 509 /* redistribute d to a column of a 2d matrix */ 510 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 511 512 /* broadcast along process rows */ 513 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 514 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 515 516 /* local scaling */ 517 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 518 519 PetscCall(PetscFree(d2d)); 520 PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d)); 521 } 522 PetscFunctionReturn(0); 523 } 524 525 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 526 { 527 PetscFunctionBegin; 528 *missing = PETSC_FALSE; 529 PetscFunctionReturn(0); 530 } 531 532 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 533 { 534 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 535 PetscBLASInt n,one=1; 536 537 PetscFunctionBegin; 538 n = x->lld*x->locc; 539 PetscCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 540 PetscFunctionReturn(0); 541 } 542 543 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 544 { 545 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546 PetscBLASInt i,n; 547 PetscScalar v; 548 549 PetscFunctionBegin; 550 n = PetscMin(x->M,x->N); 551 for (i=1;i<=n;i++) { 552 PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 553 v += alpha; 554 PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 555 } 556 PetscFunctionReturn(0); 557 } 558 559 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 560 { 561 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 562 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 563 PetscBLASInt one=1; 564 PetscScalar beta=1.0; 565 566 PetscFunctionBegin; 567 MatScaLAPACKCheckDistribution(Y,1,X,3); 568 PetscCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 569 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 570 PetscFunctionReturn(0); 571 } 572 573 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 574 { 575 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 576 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 577 578 PetscFunctionBegin; 579 PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 580 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 581 PetscFunctionReturn(0); 582 } 583 584 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 585 { 586 Mat Bs; 587 MPI_Comm comm; 588 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 589 590 PetscFunctionBegin; 591 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 592 PetscCall(MatCreate(comm,&Bs)); 593 PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 594 PetscCall(MatSetType(Bs,MATSCALAPACK)); 595 b = (Mat_ScaLAPACK*)Bs->data; 596 b->M = a->M; 597 b->N = a->N; 598 b->mb = a->mb; 599 b->nb = a->nb; 600 b->rsrc = a->rsrc; 601 b->csrc = a->csrc; 602 PetscCall(MatSetUp(Bs)); 603 *B = Bs; 604 if (op == MAT_COPY_VALUES) { 605 PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 606 } 607 Bs->assembled = PETSC_TRUE; 608 PetscFunctionReturn(0); 609 } 610 611 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 612 { 613 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 614 Mat Bs = *B; 615 PetscBLASInt one=1; 616 PetscScalar sone=1.0,zero=0.0; 617 #if defined(PETSC_USE_COMPLEX) 618 PetscInt i,j; 619 #endif 620 621 PetscFunctionBegin; 622 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B)); 623 if (reuse == MAT_INITIAL_MATRIX) { 624 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 625 *B = Bs; 626 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 627 b = (Mat_ScaLAPACK*)Bs->data; 628 PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 629 #if defined(PETSC_USE_COMPLEX) 630 /* undo conjugation */ 631 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 632 #endif 633 Bs->assembled = PETSC_TRUE; 634 PetscFunctionReturn(0); 635 } 636 637 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 638 { 639 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 640 PetscInt i,j; 641 642 PetscFunctionBegin; 643 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 644 PetscFunctionReturn(0); 645 } 646 647 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 648 { 649 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 650 Mat Bs = *B; 651 PetscBLASInt one=1; 652 PetscScalar sone=1.0,zero=0.0; 653 654 PetscFunctionBegin; 655 if (reuse == MAT_INITIAL_MATRIX) { 656 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 657 *B = Bs; 658 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 659 b = (Mat_ScaLAPACK*)Bs->data; 660 PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 661 Bs->assembled = PETSC_TRUE; 662 PetscFunctionReturn(0); 663 } 664 665 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 666 { 667 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 668 PetscScalar *x,*x2d; 669 const PetscInt *ranges; 670 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 671 672 PetscFunctionBegin; 673 PetscCall(VecCopy(B,X)); 674 PetscCall(VecGetArray(X,&x)); 675 676 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 677 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 678 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 679 xlld = PetscMax(1,A->rmap->n); 680 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 681 PetscCheckScaLapackInfo("descinit",info); 682 683 /* allocate 2d vector */ 684 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 685 PetscCall(PetscMalloc1(lszx,&x2d)); 686 xlld = PetscMax(1,lszx); 687 688 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 689 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 690 PetscCheckScaLapackInfo("descinit",info); 691 692 /* redistribute x as a column of a 2d matrix */ 693 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 694 695 /* call ScaLAPACK subroutine */ 696 switch (A->factortype) { 697 case MAT_FACTOR_LU: 698 PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 699 PetscCheckScaLapackInfo("getrs",info); 700 break; 701 case MAT_FACTOR_CHOLESKY: 702 PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 703 PetscCheckScaLapackInfo("potrs",info); 704 break; 705 default: 706 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 707 } 708 709 /* redistribute x from a column of a 2d matrix */ 710 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 711 712 PetscCall(PetscFree(x2d)); 713 PetscCall(VecRestoreArray(X,&x)); 714 PetscFunctionReturn(0); 715 } 716 717 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 718 { 719 PetscFunctionBegin; 720 PetscCall(MatSolve_ScaLAPACK(A,B,X)); 721 PetscCall(VecAXPY(X,1,Y)); 722 PetscFunctionReturn(0); 723 } 724 725 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 726 { 727 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 728 PetscBool flg1,flg2; 729 PetscBLASInt one=1,info; 730 731 PetscFunctionBegin; 732 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); 733 PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); 734 PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 735 MatScaLAPACKCheckDistribution(B,1,X,2); 736 b = (Mat_ScaLAPACK*)B->data; 737 x = (Mat_ScaLAPACK*)X->data; 738 PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); 739 740 switch (A->factortype) { 741 case MAT_FACTOR_LU: 742 PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 743 PetscCheckScaLapackInfo("getrs",info); 744 break; 745 case MAT_FACTOR_CHOLESKY: 746 PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 747 PetscCheckScaLapackInfo("potrs",info); 748 break; 749 default: 750 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 751 } 752 PetscFunctionReturn(0); 753 } 754 755 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 756 { 757 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 758 PetscBLASInt one=1,info; 759 760 PetscFunctionBegin; 761 if (!a->pivots) { 762 PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots)); 763 PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); 764 } 765 PetscCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 766 PetscCheckScaLapackInfo("getrf",info); 767 A->factortype = MAT_FACTOR_LU; 768 A->assembled = PETSC_TRUE; 769 770 PetscCall(PetscFree(A->solvertype)); 771 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 772 PetscFunctionReturn(0); 773 } 774 775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 776 { 777 PetscFunctionBegin; 778 PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 779 PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info)); 780 PetscFunctionReturn(0); 781 } 782 783 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 784 { 785 PetscFunctionBegin; 786 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 787 PetscFunctionReturn(0); 788 } 789 790 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 791 { 792 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 793 PetscBLASInt one=1,info; 794 795 PetscFunctionBegin; 796 PetscCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 797 PetscCheckScaLapackInfo("potrf",info); 798 A->factortype = MAT_FACTOR_CHOLESKY; 799 A->assembled = PETSC_TRUE; 800 801 PetscCall(PetscFree(A->solvertype)); 802 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 803 PetscFunctionReturn(0); 804 } 805 806 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 807 { 808 PetscFunctionBegin; 809 PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 810 PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info)); 811 PetscFunctionReturn(0); 812 } 813 814 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 815 { 816 PetscFunctionBegin; 817 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 822 { 823 PetscFunctionBegin; 824 *type = MATSOLVERSCALAPACK; 825 PetscFunctionReturn(0); 826 } 827 828 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 829 { 830 Mat B; 831 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 832 833 PetscFunctionBegin; 834 /* Create the factorization matrix */ 835 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); 836 B->trivialsymbolic = PETSC_TRUE; 837 B->factortype = ftype; 838 PetscCall(PetscFree(B->solvertype)); 839 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); 840 841 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); 842 *F = B; 843 PetscFunctionReturn(0); 844 } 845 846 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 847 { 848 PetscFunctionBegin; 849 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); 850 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); 851 PetscFunctionReturn(0); 852 } 853 854 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 855 { 856 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 857 PetscBLASInt one=1,lwork=0; 858 const char *ntype; 859 PetscScalar *work=NULL,dummy; 860 861 PetscFunctionBegin; 862 switch (type) { 863 case NORM_1: 864 ntype = "1"; 865 lwork = PetscMax(a->locr,a->locc); 866 break; 867 case NORM_FROBENIUS: 868 ntype = "F"; 869 work = &dummy; 870 break; 871 case NORM_INFINITY: 872 ntype = "I"; 873 lwork = PetscMax(a->locr,a->locc); 874 break; 875 default: 876 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 877 } 878 if (lwork) PetscCall(PetscMalloc1(lwork,&work)); 879 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 880 if (lwork) PetscCall(PetscFree(work)); 881 PetscFunctionReturn(0); 882 } 883 884 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 885 { 886 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 887 888 PetscFunctionBegin; 889 PetscCall(PetscArrayzero(a->loc,a->lld*a->locc)); 890 PetscFunctionReturn(0); 891 } 892 893 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 894 { 895 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 896 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 897 898 PetscFunctionBegin; 899 if (rows) { 900 n = a->locr; 901 nb = a->mb; 902 isrc = a->rsrc; 903 nproc = a->grid->nprow; 904 iproc = a->grid->myrow; 905 PetscCall(PetscMalloc1(n,&idx)); 906 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 907 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows)); 908 } 909 if (cols) { 910 n = a->locc; 911 nb = a->nb; 912 isrc = a->csrc; 913 nproc = a->grid->npcol; 914 iproc = a->grid->mycol; 915 PetscCall(PetscMalloc1(n,&idx)); 916 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 917 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols)); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 923 { 924 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 925 Mat Bmpi; 926 MPI_Comm comm; 927 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 928 const PetscInt *ranges,*branges,*cwork; 929 const PetscScalar *vwork; 930 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 931 PetscScalar *barray; 932 PetscBool differ=PETSC_FALSE; 933 PetscMPIInt size; 934 935 PetscFunctionBegin; 936 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 937 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 938 939 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 940 PetscCallMPI(MPI_Comm_size(comm,&size)); 941 PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges)); 942 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 943 } 944 945 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 946 PetscCall(MatCreate(comm,&Bmpi)); 947 m = PETSC_DECIDE; 948 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 949 n = PETSC_DECIDE; 950 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 951 PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 952 PetscCall(MatSetType(Bmpi,MATDENSE)); 953 PetscCall(MatSetUp(Bmpi)); 954 955 /* create ScaLAPACK descriptor for B (1d block distribution) */ 956 PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 957 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 958 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 959 PetscCheckScaLapackInfo("descinit",info); 960 961 /* redistribute matrix */ 962 PetscCall(MatDenseGetArray(Bmpi,&barray)); 963 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 964 PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 965 PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 966 PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 967 968 /* transfer rows of auxiliary matrix to the final matrix B */ 969 PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend)); 970 for (i=rstart;i<rend;i++) { 971 PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork)); 972 PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES)); 973 PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork)); 974 } 975 PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY)); 976 PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY)); 977 PetscCall(MatDestroy(&Bmpi)); 978 979 } else { /* normal cases */ 980 981 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 982 else { 983 PetscCall(MatCreate(comm,&Bmpi)); 984 m = PETSC_DECIDE; 985 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 986 n = PETSC_DECIDE; 987 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 988 PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 989 PetscCall(MatSetType(Bmpi,MATDENSE)); 990 PetscCall(MatSetUp(Bmpi)); 991 } 992 993 /* create ScaLAPACK descriptor for B (1d block distribution) */ 994 PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 995 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 996 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 997 PetscCheckScaLapackInfo("descinit",info); 998 999 /* redistribute matrix */ 1000 PetscCall(MatDenseGetArray(Bmpi,&barray)); 1001 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1002 PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 1003 1004 PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 1005 PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1006 if (reuse == MAT_INPLACE_MATRIX) { 1007 PetscCall(MatHeaderReplace(A,&Bmpi)); 1008 } else *B = Bmpi; 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct) 1014 { 1015 const PetscInt *ranges; 1016 PetscMPIInt size; 1017 PetscInt i,n; 1018 1019 PetscFunctionBegin; 1020 *correct = PETSC_TRUE; 1021 PetscCallMPI(MPI_Comm_size(map->comm,&size)); 1022 if (size>1) { 1023 PetscCall(PetscLayoutGetRanges(map,&ranges)); 1024 n = ranges[1]-ranges[0]; 1025 for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break; 1026 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n)); 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1032 { 1033 Mat_ScaLAPACK *b; 1034 Mat Bmpi; 1035 MPI_Comm comm; 1036 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1037 const PetscInt *ranges,*rows,*cols; 1038 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1039 PetscScalar *aarray; 1040 IS ir,ic; 1041 PetscInt lda; 1042 PetscBool flg; 1043 1044 PetscFunctionBegin; 1045 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1046 1047 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1048 else { 1049 PetscCall(MatCreate(comm,&Bmpi)); 1050 m = PETSC_DECIDE; 1051 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 1052 n = PETSC_DECIDE; 1053 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 1054 PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 1055 PetscCall(MatSetType(Bmpi,MATSCALAPACK)); 1056 PetscCall(MatSetUp(Bmpi)); 1057 } 1058 b = (Mat_ScaLAPACK*)Bmpi->data; 1059 1060 PetscCall(MatDenseGetLDA(A,&lda)); 1061 PetscCall(MatDenseGetArray(A,&aarray)); 1062 PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg)); 1063 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg)); 1064 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1065 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1066 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 1067 PetscCall(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ 1068 lld = PetscMax(lda,1); /* local leading dimension */ 1069 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1070 PetscCheckScaLapackInfo("descinit",info); 1071 1072 /* redistribute matrix */ 1073 PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1074 Bmpi->nooffprocentries = PETSC_TRUE; 1075 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1076 PetscCheck(lda == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")",lda,A->rmap->n); 1077 b->roworiented = PETSC_FALSE; 1078 PetscCall(MatGetOwnershipIS(A,&ir,&ic)); 1079 PetscCall(ISGetIndices(ir,&rows)); 1080 PetscCall(ISGetIndices(ic,&cols)); 1081 PetscCall(MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES)); 1082 PetscCall(ISRestoreIndices(ir,&rows)); 1083 PetscCall(ISRestoreIndices(ic,&cols)); 1084 PetscCall(ISDestroy(&ic)); 1085 PetscCall(ISDestroy(&ir)); 1086 } 1087 PetscCall(MatDenseRestoreArray(A,&aarray)); 1088 PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 1089 PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1090 if (reuse == MAT_INPLACE_MATRIX) { 1091 PetscCall(MatHeaderReplace(A,&Bmpi)); 1092 } else *B = Bmpi; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1097 { 1098 Mat mat_scal; 1099 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1100 const PetscInt *cols; 1101 const PetscScalar *vals; 1102 1103 PetscFunctionBegin; 1104 if (reuse == MAT_REUSE_MATRIX) { 1105 mat_scal = *newmat; 1106 PetscCall(MatZeroEntries(mat_scal)); 1107 } else { 1108 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1109 m = PETSC_DECIDE; 1110 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1111 n = PETSC_DECIDE; 1112 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 1113 PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 1114 PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 1115 PetscCall(MatSetUp(mat_scal)); 1116 } 1117 for (row=rstart;row<rend;row++) { 1118 PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 1119 PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES)); 1120 PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1121 } 1122 PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 1123 PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1124 1125 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1126 else *newmat = mat_scal; 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1131 { 1132 Mat mat_scal; 1133 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1134 const PetscInt *cols; 1135 const PetscScalar *vals; 1136 PetscScalar v; 1137 1138 PetscFunctionBegin; 1139 if (reuse == MAT_REUSE_MATRIX) { 1140 mat_scal = *newmat; 1141 PetscCall(MatZeroEntries(mat_scal)); 1142 } else { 1143 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1144 m = PETSC_DECIDE; 1145 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1146 n = PETSC_DECIDE; 1147 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 1148 PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 1149 PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 1150 PetscCall(MatSetUp(mat_scal)); 1151 } 1152 PetscCall(MatGetRowUpperTriangular(A)); 1153 for (row=rstart;row<rend;row++) { 1154 PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 1155 PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES)); 1156 for (j=0;j<ncols;j++) { /* lower triangular part */ 1157 if (cols[j] == row) continue; 1158 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1159 PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); 1160 } 1161 PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1162 } 1163 PetscCall(MatRestoreRowUpperTriangular(A)); 1164 PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 1165 PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1166 1167 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1168 else *newmat = mat_scal; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1173 { 1174 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1175 PetscInt sz=0; 1176 1177 PetscFunctionBegin; 1178 PetscCall(PetscLayoutSetUp(A->rmap)); 1179 PetscCall(PetscLayoutSetUp(A->cmap)); 1180 if (!a->lld) a->lld = a->locr; 1181 1182 PetscCall(PetscFree(a->loc)); 1183 PetscCall(PetscIntMultError(a->lld,a->locc,&sz)); 1184 PetscCall(PetscCalloc1(sz,&a->loc)); 1185 PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); 1186 1187 A->preallocated = PETSC_TRUE; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1192 { 1193 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1194 Mat_ScaLAPACK_Grid *grid; 1195 PetscBool flg; 1196 MPI_Comm icomm; 1197 1198 PetscFunctionBegin; 1199 PetscCall(MatStashDestroy_Private(&A->stash)); 1200 PetscCall(PetscFree(a->loc)); 1201 PetscCall(PetscFree(a->pivots)); 1202 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 1203 PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1204 if (--grid->grid_refct == 0) { 1205 Cblacs_gridexit(grid->ictxt); 1206 Cblacs_gridexit(grid->ictxrow); 1207 Cblacs_gridexit(grid->ictxcol); 1208 PetscCall(PetscFree(grid)); 1209 PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); 1210 } 1211 PetscCall(PetscCommDestroy(&icomm)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); 1216 PetscCall(PetscFree(A->data)); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1221 { 1222 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1223 PetscBLASInt info=0; 1224 PetscBool flg; 1225 1226 PetscFunctionBegin; 1227 PetscCall(PetscLayoutSetUp(A->rmap)); 1228 PetscCall(PetscLayoutSetUp(A->cmap)); 1229 1230 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1231 PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg)); 1232 PetscCheck(flg,A->rmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1233 PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg)); 1234 PetscCheck(flg,A->cmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1235 1236 /* compute local sizes */ 1237 PetscCall(PetscBLASIntCast(A->rmap->N,&a->M)); 1238 PetscCall(PetscBLASIntCast(A->cmap->N,&a->N)); 1239 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1240 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1241 a->lld = PetscMax(1,a->locr); 1242 1243 /* allocate local array */ 1244 PetscCall(MatScaLAPACKSetPreallocation(A)); 1245 1246 /* set up ScaLAPACK descriptor */ 1247 PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1248 PetscCheckScaLapackInfo("descinit",info); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1253 { 1254 PetscInt nstash,reallocs; 1255 1256 PetscFunctionBegin; 1257 if (A->nooffprocentries) PetscFunctionReturn(0); 1258 PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL)); 1259 PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); 1260 PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1265 { 1266 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1267 PetscMPIInt n; 1268 PetscInt i,flg,*row,*col; 1269 PetscScalar *val; 1270 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1271 1272 PetscFunctionBegin; 1273 if (A->nooffprocentries) PetscFunctionReturn(0); 1274 while (1) { 1275 PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); 1276 if (!flg) break; 1277 for (i=0;i<n;i++) { 1278 PetscCall(PetscBLASIntCast(row[i]+1,&gridx)); 1279 PetscCall(PetscBLASIntCast(col[i]+1,&gcidx)); 1280 PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1281 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1282 switch (A->insertmode) { 1283 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1284 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1285 default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1286 } 1287 } 1288 } 1289 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1294 { 1295 Mat Adense,As; 1296 MPI_Comm comm; 1297 1298 PetscFunctionBegin; 1299 PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm)); 1300 PetscCall(MatCreate(comm,&Adense)); 1301 PetscCall(MatSetType(Adense,MATDENSE)); 1302 PetscCall(MatLoad(Adense,viewer)); 1303 PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); 1304 PetscCall(MatDestroy(&Adense)); 1305 PetscCall(MatHeaderReplace(newMat,&As)); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /* -------------------------------------------------------------------*/ 1310 static struct _MatOps MatOps_Values = { 1311 MatSetValues_ScaLAPACK, 1312 0, 1313 0, 1314 MatMult_ScaLAPACK, 1315 /* 4*/ MatMultAdd_ScaLAPACK, 1316 MatMultTranspose_ScaLAPACK, 1317 MatMultTransposeAdd_ScaLAPACK, 1318 MatSolve_ScaLAPACK, 1319 MatSolveAdd_ScaLAPACK, 1320 0, 1321 /*10*/ 0, 1322 MatLUFactor_ScaLAPACK, 1323 MatCholeskyFactor_ScaLAPACK, 1324 0, 1325 MatTranspose_ScaLAPACK, 1326 /*15*/ MatGetInfo_ScaLAPACK, 1327 0, 1328 MatGetDiagonal_ScaLAPACK, 1329 MatDiagonalScale_ScaLAPACK, 1330 MatNorm_ScaLAPACK, 1331 /*20*/ MatAssemblyBegin_ScaLAPACK, 1332 MatAssemblyEnd_ScaLAPACK, 1333 MatSetOption_ScaLAPACK, 1334 MatZeroEntries_ScaLAPACK, 1335 /*24*/ 0, 1336 MatLUFactorSymbolic_ScaLAPACK, 1337 MatLUFactorNumeric_ScaLAPACK, 1338 MatCholeskyFactorSymbolic_ScaLAPACK, 1339 MatCholeskyFactorNumeric_ScaLAPACK, 1340 /*29*/ MatSetUp_ScaLAPACK, 1341 0, 1342 0, 1343 0, 1344 0, 1345 /*34*/ MatDuplicate_ScaLAPACK, 1346 0, 1347 0, 1348 0, 1349 0, 1350 /*39*/ MatAXPY_ScaLAPACK, 1351 0, 1352 0, 1353 0, 1354 MatCopy_ScaLAPACK, 1355 /*44*/ 0, 1356 MatScale_ScaLAPACK, 1357 MatShift_ScaLAPACK, 1358 0, 1359 0, 1360 /*49*/ 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*54*/ 0, 1366 0, 1367 0, 1368 0, 1369 0, 1370 /*59*/ 0, 1371 MatDestroy_ScaLAPACK, 1372 MatView_ScaLAPACK, 1373 0, 1374 0, 1375 /*64*/ 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 /*69*/ 0, 1381 0, 1382 MatConvert_ScaLAPACK_Dense, 1383 0, 1384 0, 1385 /*74*/ 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 /*79*/ 0, 1391 0, 1392 0, 1393 0, 1394 MatLoad_ScaLAPACK, 1395 /*84*/ 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*89*/ 0, 1401 0, 1402 MatMatMultNumeric_ScaLAPACK, 1403 0, 1404 0, 1405 /*94*/ 0, 1406 0, 1407 0, 1408 MatMatTransposeMultNumeric_ScaLAPACK, 1409 0, 1410 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1411 0, 1412 0, 1413 MatConjugate_ScaLAPACK, 1414 0, 1415 /*104*/0, 1416 0, 1417 0, 1418 0, 1419 0, 1420 /*109*/MatMatSolve_ScaLAPACK, 1421 0, 1422 0, 1423 0, 1424 MatMissingDiagonal_ScaLAPACK, 1425 /*114*/0, 1426 0, 1427 0, 1428 0, 1429 0, 1430 /*119*/0, 1431 MatHermitianTranspose_ScaLAPACK, 1432 0, 1433 0, 1434 0, 1435 /*124*/0, 1436 0, 1437 0, 1438 0, 1439 0, 1440 /*129*/0, 1441 0, 1442 0, 1443 0, 1444 0, 1445 /*134*/0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 0, 1451 /*140*/0, 1452 0, 1453 0, 1454 0, 1455 0, 1456 /*145*/0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 /*150*/0 1462 }; 1463 1464 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1465 { 1466 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1467 PetscInt size=stash->size,nsends; 1468 PetscInt count,*sindices,**rindices,i,j,l; 1469 PetscScalar **rvalues,*svalues; 1470 MPI_Comm comm = stash->comm; 1471 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1472 PetscMPIInt *sizes,*nlengths,nreceives; 1473 PetscInt *sp_idx,*sp_idy; 1474 PetscScalar *sp_val; 1475 PetscMatStashSpace space,space_next; 1476 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1477 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1478 1479 PetscFunctionBegin; 1480 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1481 InsertMode addv; 1482 PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 1483 PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1484 mat->insertmode = addv; /* in case this processor had no cache */ 1485 } 1486 1487 bs2 = stash->bs*stash->bs; 1488 1489 /* first count number of contributors to each processor */ 1490 PetscCall(PetscCalloc1(size,&nlengths)); 1491 PetscCall(PetscMalloc1(stash->n+1,&owner)); 1492 1493 i = j = 0; 1494 space = stash->space_head; 1495 while (space) { 1496 space_next = space->next; 1497 for (l=0; l<space->local_used; l++) { 1498 PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx)); 1499 PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx)); 1500 PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1501 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1502 nlengths[j]++; owner[i] = j; 1503 i++; 1504 } 1505 space = space_next; 1506 } 1507 1508 /* Now check what procs get messages - and compute nsends. */ 1509 PetscCall(PetscCalloc1(size,&sizes)); 1510 for (i=0, nsends=0; i<size; i++) { 1511 if (nlengths[i]) { 1512 sizes[i] = 1; nsends++; 1513 } 1514 } 1515 1516 {PetscMPIInt *onodes,*olengths; 1517 /* Determine the number of messages to expect, their lengths, from from-ids */ 1518 PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 1519 PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 1520 /* since clubbing row,col - lengths are multiplied by 2 */ 1521 for (i=0; i<nreceives; i++) olengths[i] *=2; 1522 PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 1523 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1524 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1525 PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 1526 PetscCall(PetscFree(onodes)); 1527 PetscCall(PetscFree(olengths));} 1528 1529 /* do sends: 1530 1) starts[i] gives the starting index in svalues for stuff going to 1531 the ith processor 1532 */ 1533 PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 1534 PetscCall(PetscMalloc1(2*nsends,&send_waits)); 1535 PetscCall(PetscMalloc2(size,&startv,size,&starti)); 1536 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1537 startv[0] = 0; starti[0] = 0; 1538 for (i=1; i<size; i++) { 1539 startv[i] = startv[i-1] + nlengths[i-1]; 1540 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1541 } 1542 1543 i = 0; 1544 space = stash->space_head; 1545 while (space) { 1546 space_next = space->next; 1547 sp_idx = space->idx; 1548 sp_idy = space->idy; 1549 sp_val = space->val; 1550 for (l=0; l<space->local_used; l++) { 1551 j = owner[i]; 1552 if (bs2 == 1) { 1553 svalues[startv[j]] = sp_val[l]; 1554 } else { 1555 PetscInt k; 1556 PetscScalar *buf1,*buf2; 1557 buf1 = svalues+bs2*startv[j]; 1558 buf2 = space->val + bs2*l; 1559 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1560 } 1561 sindices[starti[j]] = sp_idx[l]; 1562 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1563 startv[j]++; 1564 starti[j]++; 1565 i++; 1566 } 1567 space = space_next; 1568 } 1569 startv[0] = 0; 1570 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1571 1572 for (i=0,count=0; i<size; i++) { 1573 if (sizes[i]) { 1574 PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 1575 PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 1576 } 1577 } 1578 #if defined(PETSC_USE_INFO) 1579 PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends)); 1580 for (i=0; i<size; i++) { 1581 if (sizes[i]) { 1582 PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 1583 } 1584 } 1585 #endif 1586 PetscCall(PetscFree(nlengths)); 1587 PetscCall(PetscFree(owner)); 1588 PetscCall(PetscFree2(startv,starti)); 1589 PetscCall(PetscFree(sizes)); 1590 1591 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1592 PetscCall(PetscMalloc1(2*nreceives,&recv_waits)); 1593 1594 for (i=0; i<nreceives; i++) { 1595 recv_waits[2*i] = recv_waits1[i]; 1596 recv_waits[2*i+1] = recv_waits2[i]; 1597 } 1598 stash->recv_waits = recv_waits; 1599 1600 PetscCall(PetscFree(recv_waits1)); 1601 PetscCall(PetscFree(recv_waits2)); 1602 1603 stash->svalues = svalues; 1604 stash->sindices = sindices; 1605 stash->rvalues = rvalues; 1606 stash->rindices = rindices; 1607 stash->send_waits = send_waits; 1608 stash->nsends = nsends; 1609 stash->nrecvs = nreceives; 1610 stash->reproduce_count = 0; 1611 PetscFunctionReturn(0); 1612 } 1613 1614 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1615 { 1616 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1617 1618 PetscFunctionBegin; 1619 PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1620 PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); 1621 PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); 1622 PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 1623 PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1624 PetscFunctionReturn(0); 1625 } 1626 1627 /*@ 1628 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1629 the ScaLAPACK matrix 1630 1631 Logically Collective on A 1632 1633 Input Parameters: 1634 + A - a MATSCALAPACK matrix 1635 . mb - the row block size 1636 - nb - the column block size 1637 1638 Level: intermediate 1639 1640 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1641 @*/ 1642 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1643 { 1644 PetscFunctionBegin; 1645 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1646 PetscValidLogicalCollectiveInt(A,mb,2); 1647 PetscValidLogicalCollectiveInt(A,nb,3); 1648 PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1653 { 1654 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1655 1656 PetscFunctionBegin; 1657 if (mb) *mb = a->mb; 1658 if (nb) *nb = a->nb; 1659 PetscFunctionReturn(0); 1660 } 1661 1662 /*@ 1663 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1664 the ScaLAPACK matrix 1665 1666 Not collective 1667 1668 Input Parameter: 1669 . A - a MATSCALAPACK matrix 1670 1671 Output Parameters: 1672 + mb - the row block size 1673 - nb - the column block size 1674 1675 Level: intermediate 1676 1677 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1678 @*/ 1679 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1680 { 1681 PetscFunctionBegin; 1682 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1683 PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1688 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1689 1690 /*MC 1691 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1692 1693 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1694 1695 Options Database Keys: 1696 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1697 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1698 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1699 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1700 1701 Note: 1702 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1703 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1704 the given rank. 1705 1706 Level: beginner 1707 1708 .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1709 M*/ 1710 1711 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1712 { 1713 Mat_ScaLAPACK *a; 1714 PetscBool flg,flg1; 1715 Mat_ScaLAPACK_Grid *grid; 1716 MPI_Comm icomm; 1717 PetscBLASInt nprow,npcol,myrow,mycol; 1718 PetscInt optv1,k=2,array[2]={0,0}; 1719 PetscMPIInt size; 1720 1721 PetscFunctionBegin; 1722 PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 1723 A->insertmode = NOT_SET_VALUES; 1724 1725 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); 1726 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1727 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1728 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1729 A->stash.ScatterDestroy = NULL; 1730 1731 PetscCall(PetscNewLog(A,&a)); 1732 A->data = (void*)a; 1733 1734 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1735 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1736 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); 1737 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1738 PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); 1739 } 1740 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 1741 PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1742 if (!flg) { 1743 PetscCall(PetscNewLog(A,&grid)); 1744 1745 PetscCallMPI(MPI_Comm_size(icomm,&size)); 1746 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1747 1748 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat"); 1749 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); 1750 if (flg1) { 1751 PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); 1752 grid->nprow = optv1; 1753 } 1754 PetscOptionsEnd(); 1755 1756 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1757 grid->npcol = size/grid->nprow; 1758 PetscCall(PetscBLASIntCast(grid->nprow,&nprow)); 1759 PetscCall(PetscBLASIntCast(grid->npcol,&npcol)); 1760 grid->ictxt = Csys2blacs_handle(icomm); 1761 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1762 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1763 grid->grid_refct = 1; 1764 grid->nprow = nprow; 1765 grid->npcol = npcol; 1766 grid->myrow = myrow; 1767 grid->mycol = mycol; 1768 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1769 grid->ictxrow = Csys2blacs_handle(icomm); 1770 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1771 grid->ictxcol = Csys2blacs_handle(icomm); 1772 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1773 PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); 1774 1775 } else grid->grid_refct++; 1776 PetscCall(PetscCommDestroy(&icomm)); 1777 a->grid = grid; 1778 a->mb = DEFAULT_BLOCKSIZE; 1779 a->nb = DEFAULT_BLOCKSIZE; 1780 1781 PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat"); 1782 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); 1783 if (flg) { 1784 a->mb = array[0]; 1785 a->nb = (k>1)? array[1]: a->mb; 1786 } 1787 PetscOptionsEnd(); 1788 1789 a->roworiented = PETSC_TRUE; 1790 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); 1791 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1792 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1793 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 /*@C 1798 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1799 (2D block cyclic distribution). 1800 1801 Collective 1802 1803 Input Parameters: 1804 + comm - MPI communicator 1805 . mb - row block size (or PETSC_DECIDE to have it set) 1806 . nb - column block size (or PETSC_DECIDE to have it set) 1807 . M - number of global rows 1808 . N - number of global columns 1809 . rsrc - coordinate of process that owns the first row of the distributed matrix 1810 - csrc - coordinate of process that owns the first column of the distributed matrix 1811 1812 Output Parameter: 1813 . A - the matrix 1814 1815 Options Database Keys: 1816 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1817 1818 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1819 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1820 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1821 1822 Notes: 1823 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1824 is chosen. 1825 1826 Storage Information: 1827 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1828 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1829 significance and are thus ignored. The block sizes refer to the values 1830 used for the distributed matrix, not the same meaning as in BAIJ. 1831 1832 Level: intermediate 1833 1834 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1835 @*/ 1836 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1837 { 1838 Mat_ScaLAPACK *a; 1839 PetscInt m,n; 1840 1841 PetscFunctionBegin; 1842 PetscCall(MatCreate(comm,A)); 1843 PetscCall(MatSetType(*A,MATSCALAPACK)); 1844 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1845 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1846 m = PETSC_DECIDE; 1847 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 1848 n = PETSC_DECIDE; 1849 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 1850 PetscCall(MatSetSizes(*A,m,n,M,N)); 1851 a = (Mat_ScaLAPACK*)(*A)->data; 1852 PetscCall(PetscBLASIntCast(M,&a->M)); 1853 PetscCall(PetscBLASIntCast(N,&a->N)); 1854 PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 1855 PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1856 PetscCall(PetscBLASIntCast(rsrc,&a->rsrc)); 1857 PetscCall(PetscBLASIntCast(csrc,&a->csrc)); 1858 PetscCall(MatSetUp(*A)); 1859 PetscFunctionReturn(0); 1860 } 1861