1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 CHKERRQ(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); 26 CHKERRMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(0); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 39 if (iascii) { 40 CHKERRQ(PetscViewerGetFormat(viewer,&format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 CHKERRQ(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); 43 CHKERRQ(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); 44 CHKERRQ(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); 45 CHKERRQ(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); 46 PetscFunctionReturn(0); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(0); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 CHKERRQ(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); 53 CHKERRQ(MatView(Adense,viewer)); 54 CHKERRQ(MatDestroy(&Adense)); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 61 PetscLogDouble isend[2],irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld*a->locc; /* locally allocated */ 67 isend[1] = a->locr*a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = ((PetscObject)A)->mem; 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 92 { 93 PetscFunctionBegin; 94 switch (op) { 95 case MAT_NEW_NONZERO_LOCATIONS: 96 case MAT_NEW_NONZERO_LOCATION_ERR: 97 case MAT_NEW_NONZERO_ALLOCATION_ERR: 98 case MAT_SYMMETRIC: 99 case MAT_SORTED_FULL: 100 case MAT_HERMITIAN: 101 break; 102 default: 103 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 104 } 105 PetscFunctionReturn(0); 106 } 107 108 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 109 { 110 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 111 PetscInt i,j; 112 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 113 114 PetscFunctionBegin; 115 for (i=0;i<nr;i++) { 116 if (rows[i] < 0) continue; 117 CHKERRQ(PetscBLASIntCast(rows[i]+1,&gridx)); 118 for (j=0;j<nc;j++) { 119 if (cols[j] < 0) continue; 120 CHKERRQ(PetscBLASIntCast(cols[j]+1,&gcidx)); 121 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 122 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 123 switch (imode) { 124 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 125 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 126 default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 127 } 128 } else { 129 PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 130 A->assembled = PETSC_FALSE; 131 CHKERRQ(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES))); 132 } 133 } 134 } 135 PetscFunctionReturn(0); 136 } 137 138 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 139 { 140 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 141 PetscScalar *x2d,*y2d,alpha=1.0; 142 const PetscInt *ranges; 143 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 144 145 PetscFunctionBegin; 146 if (transpose) { 147 148 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 149 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 150 CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 151 xlld = PetscMax(1,A->rmap->n); 152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 153 PetscCheckScaLapackInfo("descinit",info); 154 CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 155 CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ 156 ylld = 1; 157 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 158 PetscCheckScaLapackInfo("descinit",info); 159 160 /* allocate 2d vectors */ 161 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 162 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 163 CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 164 xlld = PetscMax(1,lszx); 165 166 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 167 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 168 PetscCheckScaLapackInfo("descinit",info); 169 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 170 PetscCheckScaLapackInfo("descinit",info); 171 172 /* redistribute x as a column of a 2d matrix */ 173 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 174 175 /* redistribute y as a row of a 2d matrix */ 176 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 177 178 /* call PBLAS subroutine */ 179 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 180 181 /* redistribute y from a row of a 2d matrix */ 182 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 183 184 } else { /* non-transpose */ 185 186 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 187 CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 188 CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ 189 xlld = 1; 190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 191 PetscCheckScaLapackInfo("descinit",info); 192 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 193 CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ 194 ylld = PetscMax(1,A->rmap->n); 195 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 196 PetscCheckScaLapackInfo("descinit",info); 197 198 /* allocate 2d vectors */ 199 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 200 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 201 CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 202 ylld = PetscMax(1,lszy); 203 204 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 205 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 206 PetscCheckScaLapackInfo("descinit",info); 207 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 208 PetscCheckScaLapackInfo("descinit",info); 209 210 /* redistribute x as a row of a 2d matrix */ 211 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 212 213 /* redistribute y as a column of a 2d matrix */ 214 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 215 216 /* call PBLAS subroutine */ 217 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 218 219 /* redistribute y from a column of a 2d matrix */ 220 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 221 222 } 223 CHKERRQ(PetscFree2(x2d,y2d)); 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 228 { 229 const PetscScalar *xarray; 230 PetscScalar *yarray; 231 232 PetscFunctionBegin; 233 CHKERRQ(VecGetArrayRead(x,&xarray)); 234 CHKERRQ(VecGetArray(y,&yarray)); 235 CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); 236 CHKERRQ(VecRestoreArrayRead(x,&xarray)); 237 CHKERRQ(VecRestoreArray(y,&yarray)); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 242 { 243 const PetscScalar *xarray; 244 PetscScalar *yarray; 245 246 PetscFunctionBegin; 247 CHKERRQ(VecGetArrayRead(x,&xarray)); 248 CHKERRQ(VecGetArray(y,&yarray)); 249 CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); 250 CHKERRQ(VecRestoreArrayRead(x,&xarray)); 251 CHKERRQ(VecRestoreArray(y,&yarray)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 256 { 257 const PetscScalar *xarray; 258 PetscScalar *zarray; 259 260 PetscFunctionBegin; 261 if (y != z) CHKERRQ(VecCopy(y,z)); 262 CHKERRQ(VecGetArrayRead(x,&xarray)); 263 CHKERRQ(VecGetArray(z,&zarray)); 264 CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); 265 CHKERRQ(VecRestoreArrayRead(x,&xarray)); 266 CHKERRQ(VecRestoreArray(z,&zarray)); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 271 { 272 const PetscScalar *xarray; 273 PetscScalar *zarray; 274 275 PetscFunctionBegin; 276 if (y != z) CHKERRQ(VecCopy(y,z)); 277 CHKERRQ(VecGetArrayRead(x,&xarray)); 278 CHKERRQ(VecGetArray(z,&zarray)); 279 CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); 280 CHKERRQ(VecRestoreArrayRead(x,&xarray)); 281 CHKERRQ(VecRestoreArray(z,&zarray)); 282 PetscFunctionReturn(0); 283 } 284 285 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 286 { 287 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 288 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 289 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 290 PetscScalar sone=1.0,zero=0.0; 291 PetscBLASInt one=1; 292 293 PetscFunctionBegin; 294 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 295 C->assembled = PETSC_TRUE; 296 PetscFunctionReturn(0); 297 } 298 299 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 300 { 301 PetscFunctionBegin; 302 CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 303 CHKERRQ(MatSetType(C,MATSCALAPACK)); 304 CHKERRQ(MatSetUp(C)); 305 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 306 PetscFunctionReturn(0); 307 } 308 309 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 310 { 311 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 312 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 313 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 314 PetscScalar sone=1.0,zero=0.0; 315 PetscBLASInt one=1; 316 317 PetscFunctionBegin; 318 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 319 C->assembled = PETSC_TRUE; 320 PetscFunctionReturn(0); 321 } 322 323 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 324 { 325 PetscFunctionBegin; 326 CHKERRQ(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); 327 CHKERRQ(MatSetType(C,MATSCALAPACK)); 328 CHKERRQ(MatSetUp(C)); 329 PetscFunctionReturn(0); 330 } 331 332 /* --------------------------------------- */ 333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 334 { 335 PetscFunctionBegin; 336 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 337 C->ops->productsymbolic = MatProductSymbolic_AB; 338 PetscFunctionReturn(0); 339 } 340 341 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 342 { 343 PetscFunctionBegin; 344 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 345 C->ops->productsymbolic = MatProductSymbolic_ABt; 346 PetscFunctionReturn(0); 347 } 348 349 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 350 { 351 Mat_Product *product = C->product; 352 353 PetscFunctionBegin; 354 switch (product->type) { 355 case MATPRODUCT_AB: 356 CHKERRQ(MatProductSetFromOptions_ScaLAPACK_AB(C)); 357 break; 358 case MATPRODUCT_ABt: 359 CHKERRQ(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 360 break; 361 default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 362 } 363 PetscFunctionReturn(0); 364 } 365 /* --------------------------------------- */ 366 367 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 368 { 369 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 370 PetscScalar *darray,*d2d,v; 371 const PetscInt *ranges; 372 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 373 374 PetscFunctionBegin; 375 CHKERRQ(VecGetArray(D,&darray)); 376 377 if (A->rmap->N<=A->cmap->N) { /* row version */ 378 379 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 380 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 381 CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 382 dlld = PetscMax(1,A->rmap->n); 383 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 384 PetscCheckScaLapackInfo("descinit",info); 385 386 /* allocate 2d vector */ 387 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 388 CHKERRQ(PetscCalloc1(lszd,&d2d)); 389 dlld = PetscMax(1,lszd); 390 391 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 392 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 393 PetscCheckScaLapackInfo("descinit",info); 394 395 /* collect diagonal */ 396 for (j=1;j<=a->M;j++) { 397 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 398 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 399 } 400 401 /* redistribute d from a column of a 2d matrix */ 402 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 403 CHKERRQ(PetscFree(d2d)); 404 405 } else { /* column version */ 406 407 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 408 CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 409 CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 410 dlld = 1; 411 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 412 PetscCheckScaLapackInfo("descinit",info); 413 414 /* allocate 2d vector */ 415 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 416 CHKERRQ(PetscCalloc1(lszd,&d2d)); 417 418 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 419 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 420 PetscCheckScaLapackInfo("descinit",info); 421 422 /* collect diagonal */ 423 for (j=1;j<=a->N;j++) { 424 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 425 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 426 } 427 428 /* redistribute d from a row of a 2d matrix */ 429 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 430 CHKERRQ(PetscFree(d2d)); 431 } 432 433 CHKERRQ(VecRestoreArray(D,&darray)); 434 CHKERRQ(VecAssemblyBegin(D)); 435 CHKERRQ(VecAssemblyEnd(D)); 436 PetscFunctionReturn(0); 437 } 438 439 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 440 { 441 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 442 const PetscScalar *d; 443 const PetscInt *ranges; 444 PetscScalar *d2d; 445 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 446 447 PetscFunctionBegin; 448 if (R) { 449 CHKERRQ(VecGetArrayRead(R,(const PetscScalar **)&d)); 450 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 451 CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges)); 452 CHKERRQ(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 453 dlld = 1; 454 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 455 PetscCheckScaLapackInfo("descinit",info); 456 457 /* allocate 2d vector */ 458 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 459 CHKERRQ(PetscCalloc1(lszd,&d2d)); 460 461 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 462 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 463 PetscCheckScaLapackInfo("descinit",info); 464 465 /* redistribute d to a row of a 2d matrix */ 466 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 467 468 /* broadcast along process columns */ 469 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 470 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 471 472 /* local scaling */ 473 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 474 475 CHKERRQ(PetscFree(d2d)); 476 CHKERRQ(VecRestoreArrayRead(R,(const PetscScalar **)&d)); 477 } 478 if (L) { 479 CHKERRQ(VecGetArrayRead(L,(const PetscScalar **)&d)); 480 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 481 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 482 CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 483 dlld = PetscMax(1,A->rmap->n); 484 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 485 PetscCheckScaLapackInfo("descinit",info); 486 487 /* allocate 2d vector */ 488 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 489 CHKERRQ(PetscCalloc1(lszd,&d2d)); 490 dlld = PetscMax(1,lszd); 491 492 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 493 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 494 PetscCheckScaLapackInfo("descinit",info); 495 496 /* redistribute d to a column of a 2d matrix */ 497 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 498 499 /* broadcast along process rows */ 500 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 501 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 502 503 /* local scaling */ 504 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 505 506 CHKERRQ(PetscFree(d2d)); 507 CHKERRQ(VecRestoreArrayRead(L,(const PetscScalar **)&d)); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 513 { 514 PetscFunctionBegin; 515 *missing = PETSC_FALSE; 516 PetscFunctionReturn(0); 517 } 518 519 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 520 { 521 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 522 PetscBLASInt n,one=1; 523 524 PetscFunctionBegin; 525 n = x->lld*x->locc; 526 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 527 PetscFunctionReturn(0); 528 } 529 530 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 531 { 532 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 533 PetscBLASInt i,n; 534 PetscScalar v; 535 536 PetscFunctionBegin; 537 n = PetscMin(x->M,x->N); 538 for (i=1;i<=n;i++) { 539 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 540 v += alpha; 541 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 542 } 543 PetscFunctionReturn(0); 544 } 545 546 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 547 { 548 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 549 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 550 PetscBLASInt one=1; 551 PetscScalar beta=1.0; 552 553 PetscFunctionBegin; 554 MatScaLAPACKCheckDistribution(Y,1,X,3); 555 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 556 CHKERRQ(PetscObjectStateIncrease((PetscObject)Y)); 557 PetscFunctionReturn(0); 558 } 559 560 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 561 { 562 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 563 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 564 565 PetscFunctionBegin; 566 CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 567 CHKERRQ(PetscObjectStateIncrease((PetscObject)B)); 568 PetscFunctionReturn(0); 569 } 570 571 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 572 { 573 Mat Bs; 574 MPI_Comm comm; 575 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 576 577 PetscFunctionBegin; 578 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 579 CHKERRQ(MatCreate(comm,&Bs)); 580 CHKERRQ(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 581 CHKERRQ(MatSetType(Bs,MATSCALAPACK)); 582 b = (Mat_ScaLAPACK*)Bs->data; 583 b->M = a->M; 584 b->N = a->N; 585 b->mb = a->mb; 586 b->nb = a->nb; 587 b->rsrc = a->rsrc; 588 b->csrc = a->csrc; 589 CHKERRQ(MatSetUp(Bs)); 590 *B = Bs; 591 if (op == MAT_COPY_VALUES) { 592 CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 593 } 594 Bs->assembled = PETSC_TRUE; 595 PetscFunctionReturn(0); 596 } 597 598 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 599 { 600 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601 Mat Bs = *B; 602 PetscBLASInt one=1; 603 PetscScalar sone=1.0,zero=0.0; 604 #if defined(PETSC_USE_COMPLEX) 605 PetscInt i,j; 606 #endif 607 608 PetscFunctionBegin; 609 if (reuse == MAT_INITIAL_MATRIX) { 610 CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 611 *B = Bs; 612 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 613 b = (Mat_ScaLAPACK*)Bs->data; 614 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 615 #if defined(PETSC_USE_COMPLEX) 616 /* undo conjugation */ 617 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 618 #endif 619 Bs->assembled = PETSC_TRUE; 620 PetscFunctionReturn(0); 621 } 622 623 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 624 { 625 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 626 PetscInt i,j; 627 628 PetscFunctionBegin; 629 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 630 PetscFunctionReturn(0); 631 } 632 633 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 634 { 635 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 636 Mat Bs = *B; 637 PetscBLASInt one=1; 638 PetscScalar sone=1.0,zero=0.0; 639 640 PetscFunctionBegin; 641 if (reuse == MAT_INITIAL_MATRIX) { 642 CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 643 *B = Bs; 644 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 645 b = (Mat_ScaLAPACK*)Bs->data; 646 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 647 Bs->assembled = PETSC_TRUE; 648 PetscFunctionReturn(0); 649 } 650 651 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 652 { 653 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 654 PetscScalar *x,*x2d; 655 const PetscInt *ranges; 656 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 657 658 PetscFunctionBegin; 659 CHKERRQ(VecCopy(B,X)); 660 CHKERRQ(VecGetArray(X,&x)); 661 662 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 663 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 664 CHKERRQ(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 665 xlld = PetscMax(1,A->rmap->n); 666 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 667 PetscCheckScaLapackInfo("descinit",info); 668 669 /* allocate 2d vector */ 670 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 671 CHKERRQ(PetscMalloc1(lszx,&x2d)); 672 xlld = PetscMax(1,lszx); 673 674 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 675 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 676 PetscCheckScaLapackInfo("descinit",info); 677 678 /* redistribute x as a column of a 2d matrix */ 679 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 680 681 /* call ScaLAPACK subroutine */ 682 switch (A->factortype) { 683 case MAT_FACTOR_LU: 684 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 685 PetscCheckScaLapackInfo("getrs",info); 686 break; 687 case MAT_FACTOR_CHOLESKY: 688 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 689 PetscCheckScaLapackInfo("potrs",info); 690 break; 691 default: 692 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 693 } 694 695 /* redistribute x from a column of a 2d matrix */ 696 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 697 698 CHKERRQ(PetscFree(x2d)); 699 CHKERRQ(VecRestoreArray(X,&x)); 700 PetscFunctionReturn(0); 701 } 702 703 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 704 { 705 PetscFunctionBegin; 706 CHKERRQ(MatSolve_ScaLAPACK(A,B,X)); 707 CHKERRQ(VecAXPY(X,1,Y)); 708 PetscFunctionReturn(0); 709 } 710 711 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 712 { 713 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 714 PetscBool flg1,flg2; 715 PetscBLASInt one=1,info; 716 717 PetscFunctionBegin; 718 CHKERRQ(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); 719 CHKERRQ(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); 720 PetscCheckFalse(!(flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 721 MatScaLAPACKCheckDistribution(B,1,X,2); 722 b = (Mat_ScaLAPACK*)B->data; 723 x = (Mat_ScaLAPACK*)X->data; 724 CHKERRQ(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); 725 726 switch (A->factortype) { 727 case MAT_FACTOR_LU: 728 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 729 PetscCheckScaLapackInfo("getrs",info); 730 break; 731 case MAT_FACTOR_CHOLESKY: 732 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 733 PetscCheckScaLapackInfo("potrs",info); 734 break; 735 default: 736 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 742 { 743 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 744 PetscBLASInt one=1,info; 745 746 PetscFunctionBegin; 747 if (!a->pivots) { 748 CHKERRQ(PetscMalloc1(a->locr+a->mb,&a->pivots)); 749 CHKERRQ(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); 750 } 751 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 752 PetscCheckScaLapackInfo("getrf",info); 753 A->factortype = MAT_FACTOR_LU; 754 A->assembled = PETSC_TRUE; 755 756 CHKERRQ(PetscFree(A->solvertype)); 757 CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 758 PetscFunctionReturn(0); 759 } 760 761 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 762 { 763 PetscFunctionBegin; 764 CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN)); 765 CHKERRQ(MatLUFactor_ScaLAPACK(F,0,0,info)); 766 PetscFunctionReturn(0); 767 } 768 769 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 770 { 771 PetscFunctionBegin; 772 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 773 PetscFunctionReturn(0); 774 } 775 776 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 777 { 778 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 779 PetscBLASInt one=1,info; 780 781 PetscFunctionBegin; 782 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 783 PetscCheckScaLapackInfo("potrf",info); 784 A->factortype = MAT_FACTOR_CHOLESKY; 785 A->assembled = PETSC_TRUE; 786 787 CHKERRQ(PetscFree(A->solvertype)); 788 CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 789 PetscFunctionReturn(0); 790 } 791 792 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 793 { 794 PetscFunctionBegin; 795 CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN)); 796 CHKERRQ(MatCholeskyFactor_ScaLAPACK(F,0,info)); 797 PetscFunctionReturn(0); 798 } 799 800 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 801 { 802 PetscFunctionBegin; 803 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 808 { 809 PetscFunctionBegin; 810 *type = MATSOLVERSCALAPACK; 811 PetscFunctionReturn(0); 812 } 813 814 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 815 { 816 Mat B; 817 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 818 819 PetscFunctionBegin; 820 /* Create the factorization matrix */ 821 CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); 822 B->trivialsymbolic = PETSC_TRUE; 823 B->factortype = ftype; 824 CHKERRQ(PetscFree(B->solvertype)); 825 CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); 826 827 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); 828 *F = B; 829 PetscFunctionReturn(0); 830 } 831 832 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 833 { 834 PetscFunctionBegin; 835 CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); 836 CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); 837 PetscFunctionReturn(0); 838 } 839 840 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 841 { 842 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 843 PetscBLASInt one=1,lwork=0; 844 const char *ntype; 845 PetscScalar *work=NULL,dummy; 846 847 PetscFunctionBegin; 848 switch (type) { 849 case NORM_1: 850 ntype = "1"; 851 lwork = PetscMax(a->locr,a->locc); 852 break; 853 case NORM_FROBENIUS: 854 ntype = "F"; 855 work = &dummy; 856 break; 857 case NORM_INFINITY: 858 ntype = "I"; 859 lwork = PetscMax(a->locr,a->locc); 860 break; 861 default: 862 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 863 } 864 if (lwork) CHKERRQ(PetscMalloc1(lwork,&work)); 865 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 866 if (lwork) CHKERRQ(PetscFree(work)); 867 PetscFunctionReturn(0); 868 } 869 870 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 871 { 872 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 873 874 PetscFunctionBegin; 875 CHKERRQ(PetscArrayzero(a->loc,a->lld*a->locc)); 876 PetscFunctionReturn(0); 877 } 878 879 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 880 { 881 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 882 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 883 884 PetscFunctionBegin; 885 if (rows) { 886 n = a->locr; 887 nb = a->mb; 888 isrc = a->rsrc; 889 nproc = a->grid->nprow; 890 iproc = a->grid->myrow; 891 CHKERRQ(PetscMalloc1(n,&idx)); 892 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 893 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows)); 894 } 895 if (cols) { 896 n = a->locc; 897 nb = a->nb; 898 isrc = a->csrc; 899 nproc = a->grid->npcol; 900 iproc = a->grid->mycol; 901 CHKERRQ(PetscMalloc1(n,&idx)); 902 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 903 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols)); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 909 { 910 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 911 Mat Bmpi; 912 MPI_Comm comm; 913 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 914 const PetscInt *ranges,*branges,*cwork; 915 const PetscScalar *vwork; 916 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 917 PetscScalar *barray; 918 PetscBool differ=PETSC_FALSE; 919 PetscMPIInt size; 920 921 PetscFunctionBegin; 922 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 923 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 924 925 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 926 CHKERRMPI(MPI_Comm_size(comm,&size)); 927 CHKERRQ(PetscLayoutGetRanges((*B)->rmap,&branges)); 928 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 929 } 930 931 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 932 CHKERRQ(MatCreate(comm,&Bmpi)); 933 m = PETSC_DECIDE; 934 CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 935 n = PETSC_DECIDE; 936 CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 937 CHKERRQ(MatSetSizes(Bmpi,m,n,M,N)); 938 CHKERRQ(MatSetType(Bmpi,MATDENSE)); 939 CHKERRQ(MatSetUp(Bmpi)); 940 941 /* create ScaLAPACK descriptor for B (1d block distribution) */ 942 CHKERRQ(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 943 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 944 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 945 PetscCheckScaLapackInfo("descinit",info); 946 947 /* redistribute matrix */ 948 CHKERRQ(MatDenseGetArray(Bmpi,&barray)); 949 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 950 CHKERRQ(MatDenseRestoreArray(Bmpi,&barray)); 951 CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 952 CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 953 954 /* transfer rows of auxiliary matrix to the final matrix B */ 955 CHKERRQ(MatGetOwnershipRange(Bmpi,&rstart,&rend)); 956 for (i=rstart;i<rend;i++) { 957 CHKERRQ(MatGetRow(Bmpi,i,&nz,&cwork,&vwork)); 958 CHKERRQ(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES)); 959 CHKERRQ(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork)); 960 } 961 CHKERRQ(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY)); 962 CHKERRQ(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY)); 963 CHKERRQ(MatDestroy(&Bmpi)); 964 965 } else { /* normal cases */ 966 967 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 968 else { 969 CHKERRQ(MatCreate(comm,&Bmpi)); 970 m = PETSC_DECIDE; 971 CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 972 n = PETSC_DECIDE; 973 CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 974 CHKERRQ(MatSetSizes(Bmpi,m,n,M,N)); 975 CHKERRQ(MatSetType(Bmpi,MATDENSE)); 976 CHKERRQ(MatSetUp(Bmpi)); 977 } 978 979 /* create ScaLAPACK descriptor for B (1d block distribution) */ 980 CHKERRQ(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 981 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 982 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 983 PetscCheckScaLapackInfo("descinit",info); 984 985 /* redistribute matrix */ 986 CHKERRQ(MatDenseGetArray(Bmpi,&barray)); 987 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 988 CHKERRQ(MatDenseRestoreArray(Bmpi,&barray)); 989 990 CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 991 CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 992 if (reuse == MAT_INPLACE_MATRIX) { 993 CHKERRQ(MatHeaderReplace(A,&Bmpi)); 994 } else *B = Bmpi; 995 } 996 PetscFunctionReturn(0); 997 } 998 999 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1000 { 1001 Mat_ScaLAPACK *b; 1002 Mat Bmpi; 1003 MPI_Comm comm; 1004 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1005 const PetscInt *ranges; 1006 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1007 PetscScalar *aarray; 1008 PetscInt lda; 1009 1010 PetscFunctionBegin; 1011 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 1012 1013 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1014 else { 1015 CHKERRQ(MatCreate(comm,&Bmpi)); 1016 m = PETSC_DECIDE; 1017 CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 1018 n = PETSC_DECIDE; 1019 CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 1020 CHKERRQ(MatSetSizes(Bmpi,m,n,M,N)); 1021 CHKERRQ(MatSetType(Bmpi,MATSCALAPACK)); 1022 CHKERRQ(MatSetUp(Bmpi)); 1023 } 1024 b = (Mat_ScaLAPACK*)Bmpi->data; 1025 1026 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1027 CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges)); 1028 CHKERRQ(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ 1029 CHKERRQ(MatDenseGetLDA(A,&lda)); 1030 lld = PetscMax(lda,1); /* local leading dimension */ 1031 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1032 PetscCheckScaLapackInfo("descinit",info); 1033 1034 /* redistribute matrix */ 1035 CHKERRQ(MatDenseGetArray(A,&aarray)); 1036 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1037 CHKERRQ(MatDenseRestoreArray(A,&aarray)); 1038 1039 CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 1040 CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1041 if (reuse == MAT_INPLACE_MATRIX) { 1042 CHKERRQ(MatHeaderReplace(A,&Bmpi)); 1043 } else *B = Bmpi; 1044 PetscFunctionReturn(0); 1045 } 1046 1047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1048 { 1049 Mat mat_scal; 1050 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1051 const PetscInt *cols; 1052 const PetscScalar *vals; 1053 1054 PetscFunctionBegin; 1055 if (reuse == MAT_REUSE_MATRIX) { 1056 mat_scal = *newmat; 1057 CHKERRQ(MatZeroEntries(mat_scal)); 1058 } else { 1059 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1060 m = PETSC_DECIDE; 1061 CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1062 n = PETSC_DECIDE; 1063 CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 1064 CHKERRQ(MatSetSizes(mat_scal,m,n,M,N)); 1065 CHKERRQ(MatSetType(mat_scal,MATSCALAPACK)); 1066 CHKERRQ(MatSetUp(mat_scal)); 1067 } 1068 for (row=rstart;row<rend;row++) { 1069 CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals)); 1070 CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES)); 1071 CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1072 } 1073 CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 1074 CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1075 1076 if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal)); 1077 else *newmat = mat_scal; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1082 { 1083 Mat mat_scal; 1084 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1085 const PetscInt *cols; 1086 const PetscScalar *vals; 1087 PetscScalar v; 1088 1089 PetscFunctionBegin; 1090 if (reuse == MAT_REUSE_MATRIX) { 1091 mat_scal = *newmat; 1092 CHKERRQ(MatZeroEntries(mat_scal)); 1093 } else { 1094 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1095 m = PETSC_DECIDE; 1096 CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1097 n = PETSC_DECIDE; 1098 CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 1099 CHKERRQ(MatSetSizes(mat_scal,m,n,M,N)); 1100 CHKERRQ(MatSetType(mat_scal,MATSCALAPACK)); 1101 CHKERRQ(MatSetUp(mat_scal)); 1102 } 1103 CHKERRQ(MatGetRowUpperTriangular(A)); 1104 for (row=rstart;row<rend;row++) { 1105 CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals)); 1106 CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES)); 1107 for (j=0;j<ncols;j++) { /* lower triangular part */ 1108 if (cols[j] == row) continue; 1109 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1110 CHKERRQ(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); 1111 } 1112 CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1113 } 1114 CHKERRQ(MatRestoreRowUpperTriangular(A)); 1115 CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 1116 CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1117 1118 if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal)); 1119 else *newmat = mat_scal; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1124 { 1125 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1126 PetscInt sz=0; 1127 1128 PetscFunctionBegin; 1129 CHKERRQ(PetscLayoutSetUp(A->rmap)); 1130 CHKERRQ(PetscLayoutSetUp(A->cmap)); 1131 if (!a->lld) a->lld = a->locr; 1132 1133 CHKERRQ(PetscFree(a->loc)); 1134 CHKERRQ(PetscIntMultError(a->lld,a->locc,&sz)); 1135 CHKERRQ(PetscCalloc1(sz,&a->loc)); 1136 CHKERRQ(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); 1137 1138 A->preallocated = PETSC_TRUE; 1139 PetscFunctionReturn(0); 1140 } 1141 1142 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1143 { 1144 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1145 Mat_ScaLAPACK_Grid *grid; 1146 PetscBool flg; 1147 MPI_Comm icomm; 1148 1149 PetscFunctionBegin; 1150 CHKERRQ(MatStashDestroy_Private(&A->stash)); 1151 CHKERRQ(PetscFree(a->loc)); 1152 CHKERRQ(PetscFree(a->pivots)); 1153 CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 1154 CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1155 if (--grid->grid_refct == 0) { 1156 Cblacs_gridexit(grid->ictxt); 1157 Cblacs_gridexit(grid->ictxrow); 1158 Cblacs_gridexit(grid->ictxcol); 1159 CHKERRQ(PetscFree(grid)); 1160 CHKERRMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); 1161 } 1162 CHKERRQ(PetscCommDestroy(&icomm)); 1163 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); 1164 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1165 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); 1166 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); 1167 CHKERRQ(PetscFree(A->data)); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1172 { 1173 const PetscInt *ranges; 1174 PetscMPIInt size; 1175 PetscInt i,n; 1176 1177 PetscFunctionBegin; 1178 CHKERRMPI(MPI_Comm_size(map->comm,&size)); 1179 if (size>2) { 1180 CHKERRQ(PetscLayoutGetRanges(map,&ranges)); 1181 n = ranges[1]-ranges[0]; 1182 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1183 PetscCheckFalse(i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1189 { 1190 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1191 PetscBLASInt info=0; 1192 1193 PetscFunctionBegin; 1194 CHKERRQ(PetscLayoutSetUp(A->rmap)); 1195 CHKERRQ(PetscLayoutSetUp(A->cmap)); 1196 1197 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1198 CHKERRQ(MatScaLAPACKCheckLayout(A->rmap)); 1199 CHKERRQ(MatScaLAPACKCheckLayout(A->cmap)); 1200 1201 /* compute local sizes */ 1202 CHKERRQ(PetscBLASIntCast(A->rmap->N,&a->M)); 1203 CHKERRQ(PetscBLASIntCast(A->cmap->N,&a->N)); 1204 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1205 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1206 a->lld = PetscMax(1,a->locr); 1207 1208 /* allocate local array */ 1209 CHKERRQ(MatScaLAPACKSetPreallocation(A)); 1210 1211 /* set up ScaLAPACK descriptor */ 1212 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1213 PetscCheckScaLapackInfo("descinit",info); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1218 { 1219 PetscInt nstash,reallocs; 1220 1221 PetscFunctionBegin; 1222 if (A->nooffprocentries) PetscFunctionReturn(0); 1223 CHKERRQ(MatStashScatterBegin_Private(A,&A->stash,NULL)); 1224 CHKERRQ(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); 1225 CHKERRQ(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1230 { 1231 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1232 PetscMPIInt n; 1233 PetscInt i,flg,*row,*col; 1234 PetscScalar *val; 1235 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1236 1237 PetscFunctionBegin; 1238 if (A->nooffprocentries) PetscFunctionReturn(0); 1239 while (1) { 1240 CHKERRQ(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); 1241 if (!flg) break; 1242 for (i=0;i<n;i++) { 1243 CHKERRQ(PetscBLASIntCast(row[i]+1,&gridx)); 1244 CHKERRQ(PetscBLASIntCast(col[i]+1,&gcidx)); 1245 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1246 PetscCheckFalse(rsrc!=a->grid->myrow || csrc!=a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1247 switch (A->insertmode) { 1248 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1249 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1250 default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1251 } 1252 } 1253 } 1254 CHKERRQ(MatStashScatterEnd_Private(&A->stash)); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1259 { 1260 Mat Adense,As; 1261 MPI_Comm comm; 1262 1263 PetscFunctionBegin; 1264 CHKERRQ(PetscObjectGetComm((PetscObject)newMat,&comm)); 1265 CHKERRQ(MatCreate(comm,&Adense)); 1266 CHKERRQ(MatSetType(Adense,MATDENSE)); 1267 CHKERRQ(MatLoad(Adense,viewer)); 1268 CHKERRQ(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); 1269 CHKERRQ(MatDestroy(&Adense)); 1270 CHKERRQ(MatHeaderReplace(newMat,&As)); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 /* -------------------------------------------------------------------*/ 1275 static struct _MatOps MatOps_Values = { 1276 MatSetValues_ScaLAPACK, 1277 0, 1278 0, 1279 MatMult_ScaLAPACK, 1280 /* 4*/ MatMultAdd_ScaLAPACK, 1281 MatMultTranspose_ScaLAPACK, 1282 MatMultTransposeAdd_ScaLAPACK, 1283 MatSolve_ScaLAPACK, 1284 MatSolveAdd_ScaLAPACK, 1285 0, 1286 /*10*/ 0, 1287 MatLUFactor_ScaLAPACK, 1288 MatCholeskyFactor_ScaLAPACK, 1289 0, 1290 MatTranspose_ScaLAPACK, 1291 /*15*/ MatGetInfo_ScaLAPACK, 1292 0, 1293 MatGetDiagonal_ScaLAPACK, 1294 MatDiagonalScale_ScaLAPACK, 1295 MatNorm_ScaLAPACK, 1296 /*20*/ MatAssemblyBegin_ScaLAPACK, 1297 MatAssemblyEnd_ScaLAPACK, 1298 MatSetOption_ScaLAPACK, 1299 MatZeroEntries_ScaLAPACK, 1300 /*24*/ 0, 1301 MatLUFactorSymbolic_ScaLAPACK, 1302 MatLUFactorNumeric_ScaLAPACK, 1303 MatCholeskyFactorSymbolic_ScaLAPACK, 1304 MatCholeskyFactorNumeric_ScaLAPACK, 1305 /*29*/ MatSetUp_ScaLAPACK, 1306 0, 1307 0, 1308 0, 1309 0, 1310 /*34*/ MatDuplicate_ScaLAPACK, 1311 0, 1312 0, 1313 0, 1314 0, 1315 /*39*/ MatAXPY_ScaLAPACK, 1316 0, 1317 0, 1318 0, 1319 MatCopy_ScaLAPACK, 1320 /*44*/ 0, 1321 MatScale_ScaLAPACK, 1322 MatShift_ScaLAPACK, 1323 0, 1324 0, 1325 /*49*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*54*/ 0, 1331 0, 1332 0, 1333 0, 1334 0, 1335 /*59*/ 0, 1336 MatDestroy_ScaLAPACK, 1337 MatView_ScaLAPACK, 1338 0, 1339 0, 1340 /*64*/ 0, 1341 0, 1342 0, 1343 0, 1344 0, 1345 /*69*/ 0, 1346 0, 1347 MatConvert_ScaLAPACK_Dense, 1348 0, 1349 0, 1350 /*74*/ 0, 1351 0, 1352 0, 1353 0, 1354 0, 1355 /*79*/ 0, 1356 0, 1357 0, 1358 0, 1359 MatLoad_ScaLAPACK, 1360 /*84*/ 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*89*/ 0, 1366 0, 1367 MatMatMultNumeric_ScaLAPACK, 1368 0, 1369 0, 1370 /*94*/ 0, 1371 0, 1372 0, 1373 MatMatTransposeMultNumeric_ScaLAPACK, 1374 0, 1375 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1376 0, 1377 0, 1378 MatConjugate_ScaLAPACK, 1379 0, 1380 /*104*/0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 /*109*/MatMatSolve_ScaLAPACK, 1386 0, 1387 0, 1388 0, 1389 MatMissingDiagonal_ScaLAPACK, 1390 /*114*/0, 1391 0, 1392 0, 1393 0, 1394 0, 1395 /*119*/0, 1396 MatHermitianTranspose_ScaLAPACK, 1397 0, 1398 0, 1399 0, 1400 /*124*/0, 1401 0, 1402 0, 1403 0, 1404 0, 1405 /*129*/0, 1406 0, 1407 0, 1408 0, 1409 0, 1410 /*134*/0, 1411 0, 1412 0, 1413 0, 1414 0, 1415 0, 1416 /*140*/0, 1417 0, 1418 0, 1419 0, 1420 0, 1421 /*145*/0, 1422 0, 1423 0 1424 }; 1425 1426 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1427 { 1428 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1429 PetscInt size=stash->size,nsends; 1430 PetscInt count,*sindices,**rindices,i,j,l; 1431 PetscScalar **rvalues,*svalues; 1432 MPI_Comm comm = stash->comm; 1433 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1434 PetscMPIInt *sizes,*nlengths,nreceives; 1435 PetscInt *sp_idx,*sp_idy; 1436 PetscScalar *sp_val; 1437 PetscMatStashSpace space,space_next; 1438 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1439 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1440 1441 PetscFunctionBegin; 1442 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1443 InsertMode addv; 1444 CHKERRMPI(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 1445 PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1446 mat->insertmode = addv; /* in case this processor had no cache */ 1447 } 1448 1449 bs2 = stash->bs*stash->bs; 1450 1451 /* first count number of contributors to each processor */ 1452 CHKERRQ(PetscCalloc1(size,&nlengths)); 1453 CHKERRQ(PetscMalloc1(stash->n+1,&owner)); 1454 1455 i = j = 0; 1456 space = stash->space_head; 1457 while (space) { 1458 space_next = space->next; 1459 for (l=0; l<space->local_used; l++) { 1460 CHKERRQ(PetscBLASIntCast(space->idx[l]+1,&gridx)); 1461 CHKERRQ(PetscBLASIntCast(space->idy[l]+1,&gcidx)); 1462 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1463 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1464 nlengths[j]++; owner[i] = j; 1465 i++; 1466 } 1467 space = space_next; 1468 } 1469 1470 /* Now check what procs get messages - and compute nsends. */ 1471 CHKERRQ(PetscCalloc1(size,&sizes)); 1472 for (i=0, nsends=0; i<size; i++) { 1473 if (nlengths[i]) { 1474 sizes[i] = 1; nsends++; 1475 } 1476 } 1477 1478 {PetscMPIInt *onodes,*olengths; 1479 /* Determine the number of messages to expect, their lengths, from from-ids */ 1480 CHKERRQ(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 1481 CHKERRQ(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 1482 /* since clubbing row,col - lengths are multiplied by 2 */ 1483 for (i=0; i<nreceives; i++) olengths[i] *=2; 1484 CHKERRQ(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 1485 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1486 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1487 CHKERRQ(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 1488 CHKERRQ(PetscFree(onodes)); 1489 CHKERRQ(PetscFree(olengths));} 1490 1491 /* do sends: 1492 1) starts[i] gives the starting index in svalues for stuff going to 1493 the ith processor 1494 */ 1495 CHKERRQ(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 1496 CHKERRQ(PetscMalloc1(2*nsends,&send_waits)); 1497 CHKERRQ(PetscMalloc2(size,&startv,size,&starti)); 1498 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1499 startv[0] = 0; starti[0] = 0; 1500 for (i=1; i<size; i++) { 1501 startv[i] = startv[i-1] + nlengths[i-1]; 1502 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1503 } 1504 1505 i = 0; 1506 space = stash->space_head; 1507 while (space) { 1508 space_next = space->next; 1509 sp_idx = space->idx; 1510 sp_idy = space->idy; 1511 sp_val = space->val; 1512 for (l=0; l<space->local_used; l++) { 1513 j = owner[i]; 1514 if (bs2 == 1) { 1515 svalues[startv[j]] = sp_val[l]; 1516 } else { 1517 PetscInt k; 1518 PetscScalar *buf1,*buf2; 1519 buf1 = svalues+bs2*startv[j]; 1520 buf2 = space->val + bs2*l; 1521 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1522 } 1523 sindices[starti[j]] = sp_idx[l]; 1524 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1525 startv[j]++; 1526 starti[j]++; 1527 i++; 1528 } 1529 space = space_next; 1530 } 1531 startv[0] = 0; 1532 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1533 1534 for (i=0,count=0; i<size; i++) { 1535 if (sizes[i]) { 1536 CHKERRMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 1537 CHKERRMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 1538 } 1539 } 1540 #if defined(PETSC_USE_INFO) 1541 CHKERRQ(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends)); 1542 for (i=0; i<size; i++) { 1543 if (sizes[i]) { 1544 CHKERRQ(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 1545 } 1546 } 1547 #endif 1548 CHKERRQ(PetscFree(nlengths)); 1549 CHKERRQ(PetscFree(owner)); 1550 CHKERRQ(PetscFree2(startv,starti)); 1551 CHKERRQ(PetscFree(sizes)); 1552 1553 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1554 CHKERRQ(PetscMalloc1(2*nreceives,&recv_waits)); 1555 1556 for (i=0; i<nreceives; i++) { 1557 recv_waits[2*i] = recv_waits1[i]; 1558 recv_waits[2*i+1] = recv_waits2[i]; 1559 } 1560 stash->recv_waits = recv_waits; 1561 1562 CHKERRQ(PetscFree(recv_waits1)); 1563 CHKERRQ(PetscFree(recv_waits2)); 1564 1565 stash->svalues = svalues; 1566 stash->sindices = sindices; 1567 stash->rvalues = rvalues; 1568 stash->rindices = rindices; 1569 stash->send_waits = send_waits; 1570 stash->nsends = nsends; 1571 stash->nrecvs = nreceives; 1572 stash->reproduce_count = 0; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1577 { 1578 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1579 1580 PetscFunctionBegin; 1581 PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1582 PetscCheckFalse(mb<1 && mb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); 1583 PetscCheckFalse(nb<1 && nb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); 1584 CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 1585 CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 /*@ 1590 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1591 the ScaLAPACK matrix 1592 1593 Logically Collective on A 1594 1595 Input Parameters: 1596 + A - a MATSCALAPACK matrix 1597 . mb - the row block size 1598 - nb - the column block size 1599 1600 Level: intermediate 1601 1602 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1603 @*/ 1604 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1605 { 1606 PetscFunctionBegin; 1607 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1608 PetscValidLogicalCollectiveInt(A,mb,2); 1609 PetscValidLogicalCollectiveInt(A,nb,3); 1610 CHKERRQ(PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb))); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1615 { 1616 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1617 1618 PetscFunctionBegin; 1619 if (mb) *mb = a->mb; 1620 if (nb) *nb = a->nb; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 /*@ 1625 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1626 the ScaLAPACK matrix 1627 1628 Not collective 1629 1630 Input Parameter: 1631 . A - a MATSCALAPACK matrix 1632 1633 Output Parameters: 1634 + mb - the row block size 1635 - nb - the column block size 1636 1637 Level: intermediate 1638 1639 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1640 @*/ 1641 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1642 { 1643 PetscFunctionBegin; 1644 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1645 CHKERRQ(PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb))); 1646 PetscFunctionReturn(0); 1647 } 1648 1649 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1650 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1651 1652 /*MC 1653 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1654 1655 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1656 1657 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1658 1659 Options Database Keys: 1660 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1661 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1662 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1663 1664 Level: beginner 1665 1666 .seealso: MATDENSE, MATELEMENTAL 1667 M*/ 1668 1669 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1670 { 1671 Mat_ScaLAPACK *a; 1672 PetscErrorCode ierr; 1673 PetscBool flg,flg1; 1674 Mat_ScaLAPACK_Grid *grid; 1675 MPI_Comm icomm; 1676 PetscBLASInt nprow,npcol,myrow,mycol; 1677 PetscInt optv1,k=2,array[2]={0,0}; 1678 PetscMPIInt size; 1679 1680 PetscFunctionBegin; 1681 CHKERRQ(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 1682 A->insertmode = NOT_SET_VALUES; 1683 1684 CHKERRQ(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); 1685 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1686 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1687 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1688 A->stash.ScatterDestroy = NULL; 1689 1690 CHKERRQ(PetscNewLog(A,&a)); 1691 A->data = (void*)a; 1692 1693 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1694 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1695 CHKERRMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); 1696 CHKERRQ(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1697 CHKERRQ(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); 1698 } 1699 CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 1700 CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1701 if (!flg) { 1702 CHKERRQ(PetscNewLog(A,&grid)); 1703 1704 CHKERRMPI(MPI_Comm_size(icomm,&size)); 1705 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1706 1707 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1708 CHKERRQ(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); 1709 if (flg1) { 1710 PetscCheckFalse(size % optv1,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); 1711 grid->nprow = optv1; 1712 } 1713 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1714 1715 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1716 grid->npcol = size/grid->nprow; 1717 CHKERRQ(PetscBLASIntCast(grid->nprow,&nprow)); 1718 CHKERRQ(PetscBLASIntCast(grid->npcol,&npcol)); 1719 grid->ictxt = Csys2blacs_handle(icomm); 1720 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1721 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1722 grid->grid_refct = 1; 1723 grid->nprow = nprow; 1724 grid->npcol = npcol; 1725 grid->myrow = myrow; 1726 grid->mycol = mycol; 1727 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1728 grid->ictxrow = Csys2blacs_handle(icomm); 1729 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1730 grid->ictxcol = Csys2blacs_handle(icomm); 1731 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1732 CHKERRMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); 1733 1734 } else grid->grid_refct++; 1735 CHKERRQ(PetscCommDestroy(&icomm)); 1736 a->grid = grid; 1737 a->mb = DEFAULT_BLOCKSIZE; 1738 a->nb = DEFAULT_BLOCKSIZE; 1739 1740 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1741 CHKERRQ(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); 1742 if (flg) { 1743 a->mb = array[0]; 1744 a->nb = (k>1)? array[1]: a->mb; 1745 } 1746 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1747 1748 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); 1749 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1750 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1751 CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 /*@C 1756 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1757 (2D block cyclic distribution). 1758 1759 Collective 1760 1761 Input Parameters: 1762 + comm - MPI communicator 1763 . mb - row block size (or PETSC_DECIDE to have it set) 1764 . nb - column block size (or PETSC_DECIDE to have it set) 1765 . M - number of global rows 1766 . N - number of global columns 1767 . rsrc - coordinate of process that owns the first row of the distributed matrix 1768 - csrc - coordinate of process that owns the first column of the distributed matrix 1769 1770 Output Parameter: 1771 . A - the matrix 1772 1773 Options Database Keys: 1774 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1775 1776 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1777 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1778 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1779 1780 Notes: 1781 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1782 is chosen. 1783 1784 Storage Information: 1785 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1786 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1787 significance and are thus ignored. The block sizes refer to the values 1788 used for the distributed matrix, not the same meaning as in BAIJ. 1789 1790 Level: intermediate 1791 1792 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1793 @*/ 1794 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1795 { 1796 Mat_ScaLAPACK *a; 1797 PetscInt m,n; 1798 1799 PetscFunctionBegin; 1800 CHKERRQ(MatCreate(comm,A)); 1801 CHKERRQ(MatSetType(*A,MATSCALAPACK)); 1802 PetscCheckFalse(M==PETSC_DECIDE || N==PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1803 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1804 m = PETSC_DECIDE; 1805 CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M)); 1806 n = PETSC_DECIDE; 1807 CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N)); 1808 CHKERRQ(MatSetSizes(*A,m,n,M,N)); 1809 a = (Mat_ScaLAPACK*)(*A)->data; 1810 CHKERRQ(PetscBLASIntCast(M,&a->M)); 1811 CHKERRQ(PetscBLASIntCast(N,&a->N)); 1812 CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 1813 CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1814 CHKERRQ(PetscBLASIntCast(rsrc,&a->rsrc)); 1815 CHKERRQ(PetscBLASIntCast(csrc,&a->csrc)); 1816 CHKERRQ(MatSetUp(*A)); 1817 PetscFunctionReturn(0); 1818 } 1819