1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 #define DEFAULT_BLOCKSIZE 64 4 5 /* 6 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8 */ 9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10 11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 12 { 13 PetscErrorCode ierr; 14 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 15 PetscBool iascii; 16 PetscViewerFormat format; 17 Mat Adense; 18 19 PetscFunctionBegin; 20 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 21 if (iascii) { 22 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 23 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 24 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 25 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 26 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 27 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 30 PetscFunctionReturn(0); 31 } 32 } 33 /* convert to dense format and call MatView() */ 34 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 35 ierr = MatView(Adense,viewer);CHKERRQ(ierr); 36 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 37 PetscFunctionReturn(0); 38 } 39 40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 41 { 42 PetscErrorCode ierr; 43 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 44 PetscLogDouble isend[2],irecv[2]; 45 46 PetscFunctionBegin; 47 info->block_size = 1.0; 48 49 isend[0] = a->lld*a->locc; /* locally allocated */ 50 isend[1] = a->locr*a->locc; /* used submatrix */ 51 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 52 info->nz_allocated = isend[0]; 53 info->nz_used = isend[1]; 54 } else if (flag == MAT_GLOBAL_MAX) { 55 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 56 info->nz_allocated = irecv[0]; 57 info->nz_used = irecv[1]; 58 } else if (flag == MAT_GLOBAL_SUM) { 59 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 60 info->nz_allocated = irecv[0]; 61 info->nz_used = irecv[1]; 62 } 63 64 info->nz_unneeded = 0; 65 info->assemblies = A->num_ass; 66 info->mallocs = 0; 67 info->memory = ((PetscObject)A)->mem; 68 info->fill_ratio_given = 0; 69 info->fill_ratio_needed = 0; 70 info->factor_mallocs = 0; 71 PetscFunctionReturn(0); 72 } 73 74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 75 { 76 PetscFunctionBegin; 77 switch (op) { 78 case MAT_NEW_NONZERO_LOCATIONS: 79 case MAT_NEW_NONZERO_LOCATION_ERR: 80 case MAT_NEW_NONZERO_ALLOCATION_ERR: 81 case MAT_SYMMETRIC: 82 case MAT_SORTED_FULL: 83 case MAT_HERMITIAN: 84 break; 85 default: 86 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94 PetscErrorCode ierr; 95 PetscInt i,j; 96 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 97 98 PetscFunctionBegin; 99 for (i=0;i<nr;i++) { 100 if (rows[i] < 0) continue; 101 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 102 for (j=0;j<nc;j++) { 103 if (cols[j] < 0) continue; 104 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 105 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 106 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 107 switch (imode) { 108 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 109 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 110 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 111 } 112 } else { 113 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 114 A->assembled = PETSC_FALSE; 115 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 116 } 117 } 118 } 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 123 { 124 PetscErrorCode ierr; 125 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 126 PetscScalar *x2d,*y2d,alpha=1.0; 127 const PetscInt *ranges; 128 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 129 130 PetscFunctionBegin; 131 if (transpose) { 132 133 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 134 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 135 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 136 xlld = PetscMax(1,A->rmap->n); 137 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 138 PetscCheckScaLapackInfo("descinit",info); 139 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 140 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 141 ylld = 1; 142 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 143 PetscCheckScaLapackInfo("descinit",info); 144 145 /* allocate 2d vectors */ 146 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 147 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 148 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 149 xlld = PetscMax(1,lszx); 150 151 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 153 PetscCheckScaLapackInfo("descinit",info); 154 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 155 PetscCheckScaLapackInfo("descinit",info); 156 157 /* redistribute x as a column of a 2d matrix */ 158 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 159 160 /* redistribute y as a row of a 2d matrix */ 161 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 162 163 /* call PBLAS subroutine */ 164 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 165 166 /* redistribute y from a row of a 2d matrix */ 167 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 168 169 } else { /* non-transpose */ 170 171 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 172 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 173 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 174 xlld = 1; 175 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 176 PetscCheckScaLapackInfo("descinit",info); 177 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 178 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 179 ylld = PetscMax(1,A->rmap->n); 180 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 181 PetscCheckScaLapackInfo("descinit",info); 182 183 /* allocate 2d vectors */ 184 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 185 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 186 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 187 ylld = PetscMax(1,lszy); 188 189 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 191 PetscCheckScaLapackInfo("descinit",info); 192 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 193 PetscCheckScaLapackInfo("descinit",info); 194 195 /* redistribute x as a row of a 2d matrix */ 196 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 197 198 /* redistribute y as a column of a 2d matrix */ 199 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 200 201 /* call PBLAS subroutine */ 202 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 203 204 /* redistribute y from a column of a 2d matrix */ 205 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 206 207 } 208 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 213 { 214 PetscErrorCode ierr; 215 const PetscScalar *xarray; 216 PetscScalar *yarray; 217 218 PetscFunctionBegin; 219 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 220 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 221 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 222 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 223 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 228 { 229 PetscErrorCode ierr; 230 const PetscScalar *xarray; 231 PetscScalar *yarray; 232 233 PetscFunctionBegin; 234 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 235 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 236 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 238 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 243 { 244 PetscErrorCode ierr; 245 const PetscScalar *xarray; 246 PetscScalar *zarray; 247 248 PetscFunctionBegin; 249 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 250 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 251 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 252 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 253 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 254 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 259 { 260 PetscErrorCode ierr; 261 const PetscScalar *xarray; 262 PetscScalar *zarray; 263 264 PetscFunctionBegin; 265 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 266 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 267 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 268 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 269 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 270 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 275 { 276 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 277 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 278 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 279 PetscScalar sone=1.0,zero=0.0; 280 PetscBLASInt one=1; 281 282 PetscFunctionBegin; 283 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 284 C->assembled = PETSC_TRUE; 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 289 { 290 PetscErrorCode ierr; 291 292 PetscFunctionBegin; 293 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 294 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 295 ierr = MatSetUp(C);CHKERRQ(ierr); 296 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 301 { 302 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 303 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 304 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 305 PetscScalar sone=1.0,zero=0.0; 306 PetscBLASInt one=1; 307 308 PetscFunctionBegin; 309 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 310 C->assembled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 315 { 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 320 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 321 ierr = MatSetUp(C);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 /* --------------------------------------- */ 326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 327 { 328 PetscFunctionBegin; 329 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 330 C->ops->productsymbolic = MatProductSymbolic_AB; 331 PetscFunctionReturn(0); 332 } 333 334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 335 { 336 PetscFunctionBegin; 337 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 338 C->ops->productsymbolic = MatProductSymbolic_ABt; 339 PetscFunctionReturn(0); 340 } 341 342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 343 { 344 PetscErrorCode ierr; 345 Mat_Product *product = C->product; 346 347 PetscFunctionBegin; 348 switch (product->type) { 349 case MATPRODUCT_AB: 350 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 351 break; 352 case MATPRODUCT_ABt: 353 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 354 break; 355 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 356 } 357 PetscFunctionReturn(0); 358 } 359 /* --------------------------------------- */ 360 361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 362 { 363 PetscErrorCode ierr; 364 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 365 PetscScalar *darray,*d2d,v; 366 const PetscInt *ranges; 367 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 368 369 PetscFunctionBegin; 370 ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 371 372 if (A->rmap->N<=A->cmap->N) { /* row version */ 373 374 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 375 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 376 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 377 dlld = PetscMax(1,A->rmap->n); 378 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 379 PetscCheckScaLapackInfo("descinit",info); 380 381 /* allocate 2d vector */ 382 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 383 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 384 dlld = PetscMax(1,lszd); 385 386 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 387 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 388 PetscCheckScaLapackInfo("descinit",info); 389 390 /* collect diagonal */ 391 for (j=1;j<=a->M;j++) { 392 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 393 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 394 } 395 396 /* redistribute d from a column of a 2d matrix */ 397 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 398 ierr = PetscFree(d2d);CHKERRQ(ierr); 399 400 } else { /* column version */ 401 402 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 403 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 404 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 405 dlld = 1; 406 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 407 PetscCheckScaLapackInfo("descinit",info); 408 409 /* allocate 2d vector */ 410 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 411 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 412 413 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 414 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 415 PetscCheckScaLapackInfo("descinit",info); 416 417 /* collect diagonal */ 418 for (j=1;j<=a->N;j++) { 419 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 420 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 421 } 422 423 /* redistribute d from a row of a 2d matrix */ 424 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 425 ierr = PetscFree(d2d);CHKERRQ(ierr); 426 } 427 428 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 435 { 436 PetscErrorCode ierr; 437 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 438 const PetscScalar *d; 439 const PetscInt *ranges; 440 PetscScalar *d2d; 441 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 442 443 PetscFunctionBegin; 444 if (R) { 445 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 446 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 447 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 448 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 449 dlld = 1; 450 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 451 PetscCheckScaLapackInfo("descinit",info); 452 453 /* allocate 2d vector */ 454 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 455 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 456 457 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 458 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 459 PetscCheckScaLapackInfo("descinit",info); 460 461 /* redistribute d to a row of a 2d matrix */ 462 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 463 464 /* broadcast along process columns */ 465 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 466 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 467 468 /* local scaling */ 469 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 470 471 ierr = PetscFree(d2d);CHKERRQ(ierr); 472 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 473 } 474 if (L) { 475 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 476 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 477 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 478 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 479 dlld = PetscMax(1,A->rmap->n); 480 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 481 PetscCheckScaLapackInfo("descinit",info); 482 483 /* allocate 2d vector */ 484 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 485 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 486 dlld = PetscMax(1,lszd); 487 488 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 489 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 490 PetscCheckScaLapackInfo("descinit",info); 491 492 /* redistribute d to a column of a 2d matrix */ 493 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 494 495 /* broadcast along process rows */ 496 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 497 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 498 499 /* local scaling */ 500 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 501 502 ierr = PetscFree(d2d);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 504 } 505 PetscFunctionReturn(0); 506 } 507 508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 509 { 510 PetscFunctionBegin; 511 *missing = PETSC_FALSE; 512 PetscFunctionReturn(0); 513 } 514 515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 516 { 517 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 518 PetscBLASInt n,one=1; 519 520 PetscFunctionBegin; 521 n = x->lld*x->locc; 522 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 523 PetscFunctionReturn(0); 524 } 525 526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 527 { 528 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 529 PetscBLASInt i,n; 530 PetscScalar v; 531 532 PetscFunctionBegin; 533 n = PetscMin(x->M,x->N); 534 for (i=1;i<=n;i++) { 535 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 536 v += alpha; 537 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 538 } 539 PetscFunctionReturn(0); 540 } 541 542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 543 { 544 PetscErrorCode ierr; 545 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 547 PetscBLASInt one=1; 548 PetscScalar beta=1.0; 549 550 PetscFunctionBegin; 551 MatScaLAPACKCheckDistribution(Y,1,X,3); 552 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 553 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 558 { 559 PetscErrorCode ierr; 560 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 561 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 562 563 PetscFunctionBegin; 564 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 565 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 570 { 571 Mat Bs; 572 MPI_Comm comm; 573 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 578 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 579 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 580 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 581 b = (Mat_ScaLAPACK*)Bs->data; 582 b->M = a->M; 583 b->N = a->N; 584 b->mb = a->mb; 585 b->nb = a->nb; 586 b->rsrc = a->rsrc; 587 b->csrc = a->csrc; 588 ierr = MatSetUp(Bs);CHKERRQ(ierr); 589 *B = Bs; 590 if (op == MAT_COPY_VALUES) { 591 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 592 } 593 Bs->assembled = PETSC_TRUE; 594 PetscFunctionReturn(0); 595 } 596 597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 598 { 599 PetscErrorCode ierr; 600 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601 Mat Bs = *B; 602 PetscBLASInt one=1; 603 PetscScalar sone=1.0,zero=0.0; 604 #if defined(PETSC_USE_COMPLEX) 605 PetscInt i,j; 606 #endif 607 608 PetscFunctionBegin; 609 if (reuse == MAT_INITIAL_MATRIX) { 610 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 611 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 612 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 613 ierr = MatSetUp(Bs);CHKERRQ(ierr); 614 *B = Bs; 615 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 616 b = (Mat_ScaLAPACK*)Bs->data; 617 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 618 #if defined(PETSC_USE_COMPLEX) 619 /* undo conjugation */ 620 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 621 #endif 622 Bs->assembled = PETSC_TRUE; 623 PetscFunctionReturn(0); 624 } 625 626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 627 { 628 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 629 PetscInt i,j; 630 631 PetscFunctionBegin; 632 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 637 { 638 PetscErrorCode ierr; 639 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 640 Mat Bs = *B; 641 PetscBLASInt one=1; 642 PetscScalar sone=1.0,zero=0.0; 643 644 PetscFunctionBegin; 645 if (reuse == MAT_INITIAL_MATRIX) { 646 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 647 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 648 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 649 ierr = MatSetUp(Bs);CHKERRQ(ierr); 650 *B = Bs; 651 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 652 b = (Mat_ScaLAPACK*)Bs->data; 653 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 654 Bs->assembled = PETSC_TRUE; 655 PetscFunctionReturn(0); 656 } 657 658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 659 { 660 PetscErrorCode ierr; 661 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 662 PetscScalar *x,*x2d; 663 const PetscInt *ranges; 664 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 665 666 PetscFunctionBegin; 667 ierr = VecCopy(B,X);CHKERRQ(ierr); 668 ierr = VecGetArray(X,&x);CHKERRQ(ierr); 669 670 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 671 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 672 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 673 xlld = PetscMax(1,A->rmap->n); 674 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 675 PetscCheckScaLapackInfo("descinit",info); 676 677 /* allocate 2d vector */ 678 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 679 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 680 xlld = PetscMax(1,lszx); 681 682 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 683 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 684 PetscCheckScaLapackInfo("descinit",info); 685 686 /* redistribute x as a column of a 2d matrix */ 687 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 688 689 /* call ScaLAPACK subroutine */ 690 switch (A->factortype) { 691 case MAT_FACTOR_LU: 692 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 693 PetscCheckScaLapackInfo("getrs",info); 694 break; 695 case MAT_FACTOR_CHOLESKY: 696 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 697 PetscCheckScaLapackInfo("potrs",info); 698 break; 699 default: 700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 701 break; 702 } 703 704 /* redistribute x from a column of a 2d matrix */ 705 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 706 707 ierr = PetscFree(x2d);CHKERRQ(ierr); 708 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 713 { 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 718 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 723 { 724 PetscErrorCode ierr; 725 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 726 PetscBool flg1,flg2; 727 PetscBLASInt one=1,info; 728 729 PetscFunctionBegin; 730 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 731 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 732 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 733 MatScaLAPACKCheckDistribution(B,1,X,2); 734 b = (Mat_ScaLAPACK*)B->data; 735 x = (Mat_ScaLAPACK*)X->data; 736 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 737 738 switch (A->factortype) { 739 case MAT_FACTOR_LU: 740 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 741 PetscCheckScaLapackInfo("getrs",info); 742 break; 743 case MAT_FACTOR_CHOLESKY: 744 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 745 PetscCheckScaLapackInfo("potrs",info); 746 break; 747 default: 748 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 749 break; 750 } 751 PetscFunctionReturn(0); 752 } 753 754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 755 { 756 PetscErrorCode ierr; 757 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 758 PetscBLASInt one=1,info; 759 760 PetscFunctionBegin; 761 if (!a->pivots) { 762 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 763 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 764 } 765 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 766 PetscCheckScaLapackInfo("getrf",info); 767 A->factortype = MAT_FACTOR_LU; 768 A->assembled = PETSC_TRUE; 769 770 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 771 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 776 { 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 781 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 782 PetscFunctionReturn(0); 783 } 784 785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 786 { 787 PetscFunctionBegin; 788 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 789 PetscFunctionReturn(0); 790 } 791 792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 793 { 794 PetscErrorCode ierr; 795 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 796 PetscBLASInt one=1,info; 797 798 PetscFunctionBegin; 799 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 800 PetscCheckScaLapackInfo("potrf",info); 801 A->factortype = MAT_FACTOR_CHOLESKY; 802 A->assembled = PETSC_TRUE; 803 804 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 805 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 810 { 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 815 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 820 { 821 PetscFunctionBegin; 822 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 823 PetscFunctionReturn(0); 824 } 825 826 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 827 { 828 PetscFunctionBegin; 829 *type = MATSOLVERSCALAPACK; 830 PetscFunctionReturn(0); 831 } 832 833 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 834 { 835 Mat B; 836 PetscErrorCode ierr; 837 838 PetscFunctionBegin; 839 /* Create the factorization matrix */ 840 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 841 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 842 ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr); 843 ierr = MatSetUp(B);CHKERRQ(ierr); 844 B->factortype = ftype; 845 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 846 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 847 848 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 849 *F = B; 850 PetscFunctionReturn(0); 851 } 852 853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 854 { 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 859 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 864 { 865 PetscErrorCode ierr; 866 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 867 PetscBLASInt one=1,lwork=0; 868 const char *ntype; 869 PetscScalar *work,dummy; 870 871 PetscFunctionBegin; 872 switch (type){ 873 case NORM_1: 874 ntype = "1"; 875 lwork = PetscMax(a->locr,a->locc); 876 break; 877 case NORM_FROBENIUS: 878 ntype = "F"; 879 work = &dummy; 880 break; 881 case NORM_INFINITY: 882 ntype = "I"; 883 lwork = PetscMax(a->locr,a->locc); 884 break; 885 default: 886 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 887 } 888 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 889 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 890 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 891 PetscFunctionReturn(0); 892 } 893 894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 895 { 896 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 905 { 906 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 907 PetscErrorCode ierr; 908 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 909 910 PetscFunctionBegin; 911 if (rows) { 912 n = a->locr; 913 nb = a->mb; 914 isrc = a->rsrc; 915 nproc = a->grid->nprow; 916 iproc = a->grid->myrow; 917 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 918 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 920 } 921 if (cols) { 922 n = a->locc; 923 nb = a->nb; 924 isrc = a->csrc; 925 nproc = a->grid->npcol; 926 iproc = a->grid->mycol; 927 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 928 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 929 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 935 { 936 PetscErrorCode ierr; 937 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 938 Mat Bmpi; 939 MPI_Comm comm; 940 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 941 const PetscInt *ranges; 942 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 943 PetscScalar *barray; 944 945 PetscFunctionBegin; 946 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 947 948 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 949 else { 950 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 951 m = PETSC_DECIDE; 952 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 953 n = PETSC_DECIDE; 954 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 955 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 956 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 957 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 958 } 959 960 /* create ScaLAPACK descriptor for B (1d block distribution) */ 961 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 962 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 963 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 964 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 965 PetscCheckScaLapackInfo("descinit",info); 966 967 /* redistribute matrix */ 968 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 969 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 970 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 971 972 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 if (reuse == MAT_INPLACE_MATRIX) { 975 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 976 } else *B = Bmpi; 977 PetscFunctionReturn(0); 978 } 979 980 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 981 { 982 PetscErrorCode ierr; 983 Mat_ScaLAPACK *b; 984 Mat Bmpi; 985 MPI_Comm comm; 986 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 987 const PetscInt *ranges; 988 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 989 PetscScalar *aarray; 990 PetscInt lda; 991 992 PetscFunctionBegin; 993 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 994 995 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 996 else { 997 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 998 m = PETSC_DECIDE; 999 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1000 n = PETSC_DECIDE; 1001 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1002 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1003 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1004 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1005 } 1006 b = (Mat_ScaLAPACK*)Bmpi->data; 1007 1008 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1009 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1010 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1011 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1012 lld = PetscMax(lda,1); /* local leading dimension */ 1013 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1014 PetscCheckScaLapackInfo("descinit",info); 1015 1016 /* redistribute matrix */ 1017 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1018 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1019 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1020 1021 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1022 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 if (reuse == MAT_INPLACE_MATRIX) { 1024 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1025 } else *B = Bmpi; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1030 { 1031 Mat mat_scal; 1032 PetscErrorCode ierr; 1033 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1034 const PetscInt *cols; 1035 const PetscScalar *vals; 1036 1037 PetscFunctionBegin; 1038 if (reuse == MAT_REUSE_MATRIX) { 1039 mat_scal = *newmat; 1040 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1041 } else { 1042 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1043 m = PETSC_DECIDE; 1044 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1045 n = PETSC_DECIDE; 1046 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1047 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1048 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1049 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1050 } 1051 for (row=rstart;row<rend;row++) { 1052 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1053 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1054 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1055 } 1056 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1057 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1058 1059 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1060 else *newmat = mat_scal; 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1065 { 1066 Mat mat_scal; 1067 PetscErrorCode ierr; 1068 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1069 const PetscInt *cols; 1070 const PetscScalar *vals; 1071 PetscScalar v; 1072 1073 PetscFunctionBegin; 1074 if (reuse == MAT_REUSE_MATRIX) { 1075 mat_scal = *newmat; 1076 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1077 } else { 1078 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1079 m = PETSC_DECIDE; 1080 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1081 n = PETSC_DECIDE; 1082 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1083 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1084 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1085 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1086 } 1087 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1088 for (row=rstart;row<rend;row++) { 1089 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1090 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1091 for (j=0;j<ncols;j++) { /* lower triangular part */ 1092 if (cols[j] == row) continue; 1093 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1094 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1095 } 1096 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1097 } 1098 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1099 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1100 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1101 1102 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1103 else *newmat = mat_scal; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1108 { 1109 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt sz=0; 1112 1113 PetscFunctionBegin; 1114 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1115 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1116 if (!a->lld) a->lld = a->locr; 1117 1118 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1119 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1120 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1121 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1122 1123 A->preallocated = PETSC_TRUE; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1128 { 1129 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1130 PetscErrorCode ierr; 1131 Mat_ScaLAPACK_Grid *grid; 1132 PetscBool flg; 1133 MPI_Comm icomm; 1134 1135 PetscFunctionBegin; 1136 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1137 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1138 ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1139 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1140 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1141 if (--grid->grid_refct == 0) { 1142 Cblacs_gridexit(grid->ictxt); 1143 Cblacs_gridexit(grid->ictxrow); 1144 Cblacs_gridexit(grid->ictxcol); 1145 ierr = PetscFree(grid);CHKERRQ(ierr); 1146 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 1147 } 1148 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscFree(A->data);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1158 { 1159 PetscErrorCode ierr; 1160 const PetscInt *ranges; 1161 PetscMPIInt size; 1162 PetscInt i,n; 1163 1164 PetscFunctionBegin; 1165 ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr); 1166 if (size>2) { 1167 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1168 n = ranges[1]-ranges[0]; 1169 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1170 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1176 { 1177 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1178 PetscErrorCode ierr; 1179 PetscBLASInt info=0; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1183 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1184 1185 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1186 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1187 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1188 1189 /* compute local sizes */ 1190 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1191 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1192 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1193 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1194 a->lld = PetscMax(1,a->locr); 1195 1196 /* allocate local array */ 1197 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1198 1199 /* set up ScaLAPACK descriptor */ 1200 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1201 PetscCheckScaLapackInfo("descinit",info); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1206 { 1207 PetscErrorCode ierr; 1208 PetscInt nstash,reallocs; 1209 1210 PetscFunctionBegin; 1211 if (A->nooffprocentries) PetscFunctionReturn(0); 1212 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1213 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1214 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1219 { 1220 PetscErrorCode ierr; 1221 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1222 PetscMPIInt n; 1223 PetscInt i,flg,*row,*col; 1224 PetscScalar *val; 1225 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1226 1227 PetscFunctionBegin; 1228 if (A->nooffprocentries) PetscFunctionReturn(0); 1229 while (1) { 1230 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1231 if (!flg) break; 1232 for (i=0;i<n;i++) { 1233 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1234 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1235 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1236 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1237 switch (A->insertmode) { 1238 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1239 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1240 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1241 } 1242 } 1243 } 1244 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1249 { 1250 PetscErrorCode ierr; 1251 Mat Adense,As; 1252 MPI_Comm comm; 1253 1254 PetscFunctionBegin; 1255 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1256 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1257 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1258 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1259 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1260 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1261 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /* -------------------------------------------------------------------*/ 1266 static struct _MatOps MatOps_Values = { 1267 MatSetValues_ScaLAPACK, 1268 0, 1269 0, 1270 MatMult_ScaLAPACK, 1271 /* 4*/ MatMultAdd_ScaLAPACK, 1272 MatMultTranspose_ScaLAPACK, 1273 MatMultTransposeAdd_ScaLAPACK, 1274 MatSolve_ScaLAPACK, 1275 MatSolveAdd_ScaLAPACK, 1276 0, 1277 /*10*/ 0, 1278 MatLUFactor_ScaLAPACK, 1279 MatCholeskyFactor_ScaLAPACK, 1280 0, 1281 MatTranspose_ScaLAPACK, 1282 /*15*/ MatGetInfo_ScaLAPACK, 1283 0, 1284 MatGetDiagonal_ScaLAPACK, 1285 MatDiagonalScale_ScaLAPACK, 1286 MatNorm_ScaLAPACK, 1287 /*20*/ MatAssemblyBegin_ScaLAPACK, 1288 MatAssemblyEnd_ScaLAPACK, 1289 MatSetOption_ScaLAPACK, 1290 MatZeroEntries_ScaLAPACK, 1291 /*24*/ 0, 1292 MatLUFactorSymbolic_ScaLAPACK, 1293 MatLUFactorNumeric_ScaLAPACK, 1294 MatCholeskyFactorSymbolic_ScaLAPACK, 1295 MatCholeskyFactorNumeric_ScaLAPACK, 1296 /*29*/ MatSetUp_ScaLAPACK, 1297 0, 1298 0, 1299 0, 1300 0, 1301 /*34*/ MatDuplicate_ScaLAPACK, 1302 0, 1303 0, 1304 0, 1305 0, 1306 /*39*/ MatAXPY_ScaLAPACK, 1307 0, 1308 0, 1309 0, 1310 MatCopy_ScaLAPACK, 1311 /*44*/ 0, 1312 MatScale_ScaLAPACK, 1313 MatShift_ScaLAPACK, 1314 0, 1315 0, 1316 /*49*/ 0, 1317 0, 1318 0, 1319 0, 1320 0, 1321 /*54*/ 0, 1322 0, 1323 0, 1324 0, 1325 0, 1326 /*59*/ 0, 1327 MatDestroy_ScaLAPACK, 1328 MatView_ScaLAPACK, 1329 0, 1330 0, 1331 /*64*/ 0, 1332 0, 1333 0, 1334 0, 1335 0, 1336 /*69*/ 0, 1337 0, 1338 MatConvert_ScaLAPACK_Dense, 1339 0, 1340 0, 1341 /*74*/ 0, 1342 0, 1343 0, 1344 0, 1345 0, 1346 /*79*/ 0, 1347 0, 1348 0, 1349 0, 1350 MatLoad_ScaLAPACK, 1351 /*84*/ 0, 1352 0, 1353 0, 1354 0, 1355 0, 1356 /*89*/ 0, 1357 0, 1358 MatMatMultNumeric_ScaLAPACK, 1359 0, 1360 0, 1361 /*94*/ 0, 1362 0, 1363 0, 1364 MatMatTransposeMultNumeric_ScaLAPACK, 1365 0, 1366 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1367 0, 1368 0, 1369 MatConjugate_ScaLAPACK, 1370 0, 1371 /*104*/0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 /*109*/MatMatSolve_ScaLAPACK, 1377 0, 1378 0, 1379 0, 1380 MatMissingDiagonal_ScaLAPACK, 1381 /*114*/0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 /*119*/0, 1387 MatHermitianTranspose_ScaLAPACK, 1388 0, 1389 0, 1390 0, 1391 /*124*/0, 1392 0, 1393 0, 1394 0, 1395 0, 1396 /*129*/0, 1397 0, 1398 0, 1399 0, 1400 0, 1401 /*134*/0, 1402 0, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /*140*/0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*145*/0, 1413 0, 1414 0 1415 }; 1416 1417 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1418 { 1419 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1420 PetscInt size=stash->size,nsends; 1421 PetscErrorCode ierr; 1422 PetscInt count,*sindices,**rindices,i,j,l; 1423 PetscScalar **rvalues,*svalues; 1424 MPI_Comm comm = stash->comm; 1425 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1426 PetscMPIInt *sizes,*nlengths,nreceives; 1427 PetscInt *sp_idx,*sp_idy; 1428 PetscScalar *sp_val; 1429 PetscMatStashSpace space,space_next; 1430 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1431 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1432 1433 PetscFunctionBegin; 1434 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1435 InsertMode addv; 1436 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1437 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1438 mat->insertmode = addv; /* in case this processor had no cache */ 1439 } 1440 1441 bs2 = stash->bs*stash->bs; 1442 1443 /* first count number of contributors to each processor */ 1444 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1445 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1446 1447 i = j = 0; 1448 space = stash->space_head; 1449 while (space) { 1450 space_next = space->next; 1451 for (l=0; l<space->local_used; l++) { 1452 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1453 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1454 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1455 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1456 nlengths[j]++; owner[i] = j; 1457 i++; 1458 } 1459 space = space_next; 1460 } 1461 1462 /* Now check what procs get messages - and compute nsends. */ 1463 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1464 for (i=0, nsends=0; i<size; i++) { 1465 if (nlengths[i]) { 1466 sizes[i] = 1; nsends++; 1467 } 1468 } 1469 1470 {PetscMPIInt *onodes,*olengths; 1471 /* Determine the number of messages to expect, their lengths, from from-ids */ 1472 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1473 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1474 /* since clubbing row,col - lengths are multiplied by 2 */ 1475 for (i=0; i<nreceives; i++) olengths[i] *=2; 1476 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1477 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1478 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1479 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1480 ierr = PetscFree(onodes);CHKERRQ(ierr); 1481 ierr = PetscFree(olengths);CHKERRQ(ierr);} 1482 1483 /* do sends: 1484 1) starts[i] gives the starting index in svalues for stuff going to 1485 the ith processor 1486 */ 1487 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1488 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1489 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1490 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1491 startv[0] = 0; starti[0] = 0; 1492 for (i=1; i<size; i++) { 1493 startv[i] = startv[i-1] + nlengths[i-1]; 1494 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1495 } 1496 1497 i = 0; 1498 space = stash->space_head; 1499 while (space) { 1500 space_next = space->next; 1501 sp_idx = space->idx; 1502 sp_idy = space->idy; 1503 sp_val = space->val; 1504 for (l=0; l<space->local_used; l++) { 1505 j = owner[i]; 1506 if (bs2 == 1) { 1507 svalues[startv[j]] = sp_val[l]; 1508 } else { 1509 PetscInt k; 1510 PetscScalar *buf1,*buf2; 1511 buf1 = svalues+bs2*startv[j]; 1512 buf2 = space->val + bs2*l; 1513 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1514 } 1515 sindices[starti[j]] = sp_idx[l]; 1516 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1517 startv[j]++; 1518 starti[j]++; 1519 i++; 1520 } 1521 space = space_next; 1522 } 1523 startv[0] = 0; 1524 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1525 1526 for (i=0,count=0; i<size; i++) { 1527 if (sizes[i]) { 1528 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 1529 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 1530 } 1531 } 1532 #if defined(PETSC_USE_INFO) 1533 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1534 for (i=0; i<size; i++) { 1535 if (sizes[i]) { 1536 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1537 } 1538 } 1539 #endif 1540 ierr = PetscFree(nlengths);CHKERRQ(ierr); 1541 ierr = PetscFree(owner);CHKERRQ(ierr); 1542 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1543 ierr = PetscFree(sizes);CHKERRQ(ierr); 1544 1545 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1546 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1547 1548 for (i=0; i<nreceives; i++) { 1549 recv_waits[2*i] = recv_waits1[i]; 1550 recv_waits[2*i+1] = recv_waits2[i]; 1551 } 1552 stash->recv_waits = recv_waits; 1553 1554 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1555 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1556 1557 stash->svalues = svalues; 1558 stash->sindices = sindices; 1559 stash->rvalues = rvalues; 1560 stash->rindices = rindices; 1561 stash->send_waits = send_waits; 1562 stash->nsends = nsends; 1563 stash->nrecvs = nreceives; 1564 stash->reproduce_count = 0; 1565 PetscFunctionReturn(0); 1566 } 1567 1568 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1569 { 1570 PetscErrorCode ierr; 1571 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1572 1573 PetscFunctionBegin; 1574 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1575 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1576 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1577 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1578 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 /*@ 1583 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1584 the ScaLAPACK matrix 1585 1586 Logically Collective on A 1587 1588 Input Parameter: 1589 + A - a MATSCALAPACK matrix 1590 . mb - the row block size 1591 - nb - the column block size 1592 1593 Level: intermediate 1594 1595 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1596 @*/ 1597 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1598 { 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1603 PetscValidLogicalCollectiveInt(A,mb,2); 1604 PetscValidLogicalCollectiveInt(A,nb,3); 1605 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1610 { 1611 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1612 1613 PetscFunctionBegin; 1614 if (mb) *mb = a->mb; 1615 if (nb) *nb = a->nb; 1616 PetscFunctionReturn(0); 1617 } 1618 1619 /*@ 1620 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1621 the ScaLAPACK matrix 1622 1623 Not collective 1624 1625 Input Parameter: 1626 . A - a MATSCALAPACK matrix 1627 1628 Output Parameters: 1629 + mb - the row block size 1630 - nb - the column block size 1631 1632 Level: intermediate 1633 1634 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1635 @*/ 1636 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1637 { 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1642 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1643 PetscFunctionReturn(0); 1644 } 1645 1646 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1647 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1648 1649 /*MC 1650 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1651 1652 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1653 1654 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1655 1656 Options Database Keys: 1657 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1658 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1659 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1660 1661 Level: beginner 1662 1663 .seealso: MATDENSE, MATELEMENTAL 1664 M*/ 1665 1666 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1667 { 1668 Mat_ScaLAPACK *a; 1669 PetscErrorCode ierr; 1670 PetscBool flg,flg1; 1671 Mat_ScaLAPACK_Grid *grid; 1672 MPI_Comm icomm; 1673 PetscBLASInt nprow,npcol,myrow,mycol; 1674 PetscInt optv1,k=2,array[2]={0,0}; 1675 PetscMPIInt size; 1676 1677 PetscFunctionBegin; 1678 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1679 A->insertmode = NOT_SET_VALUES; 1680 1681 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1682 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1683 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1684 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1685 A->stash.ScatterDestroy = NULL; 1686 1687 ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1688 A->data = (void*)a; 1689 1690 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1691 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1692 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr); 1693 } 1694 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1695 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1696 if (!flg) { 1697 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1698 1699 ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr); 1700 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1701 1702 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1703 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1704 if (flg1) { 1705 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1706 grid->nprow = optv1; 1707 } 1708 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1709 1710 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1711 grid->npcol = size/grid->nprow; 1712 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1713 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1714 Cblacs_get(-1,0,&grid->ictxt); 1715 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1716 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1717 grid->grid_refct = 1; 1718 grid->nprow = nprow; 1719 grid->npcol = npcol; 1720 grid->myrow = myrow; 1721 grid->mycol = mycol; 1722 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1723 Cblacs_get(-1,0,&grid->ictxrow); 1724 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1725 Cblacs_get(-1,0,&grid->ictxcol); 1726 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1727 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr); 1728 1729 } else grid->grid_refct++; 1730 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1731 a->grid = grid; 1732 a->mb = DEFAULT_BLOCKSIZE; 1733 a->nb = DEFAULT_BLOCKSIZE; 1734 1735 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1736 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1737 if (flg) { 1738 a->mb = array[0]; 1739 a->nb = (k>1)? array[1]: a->mb; 1740 } 1741 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1742 1743 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1744 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1745 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1746 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1747 PetscFunctionReturn(0); 1748 } 1749 1750 /*@C 1751 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1752 (2D block cyclic distribution). 1753 1754 Collective 1755 1756 Input Parameters: 1757 + comm - MPI communicator 1758 . mb - row block size (or PETSC_DECIDE to have it set) 1759 . nb - column block size (or PETSC_DECIDE to have it set) 1760 . M - number of global rows 1761 . N - number of global columns 1762 . rsrc - coordinate of process that owns the first row of the distributed matrix 1763 - csrc - coordinate of process that owns the first column of the distributed matrix 1764 1765 Output Parameter: 1766 . A - the matrix 1767 1768 Options Database Keys: 1769 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1770 1771 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1772 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1773 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1774 1775 Notes: 1776 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1777 is chosen. 1778 1779 Storage Information: 1780 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1781 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1782 significance and are thus ignored. The block sizes refer to the values 1783 used for the distributed matrix, not the same meaning as in BAIJ. 1784 1785 Level: intermediate 1786 1787 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1788 @*/ 1789 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1790 { 1791 PetscErrorCode ierr; 1792 Mat_ScaLAPACK *a; 1793 PetscInt m,n; 1794 1795 PetscFunctionBegin; 1796 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1797 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1798 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1799 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1800 m = PETSC_DECIDE; 1801 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1802 n = PETSC_DECIDE; 1803 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1804 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1805 a = (Mat_ScaLAPACK*)(*A)->data; 1806 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1807 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1808 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1809 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1810 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1811 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1812 ierr = MatSetUp(*A);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816