1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 #define DEFAULT_BLOCKSIZE 64 4 5 /* 6 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8 */ 9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10 11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr); 17 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 22 { 23 PetscErrorCode ierr; 24 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 25 PetscBool iascii; 26 PetscViewerFormat format; 27 Mat Adense; 28 29 PetscFunctionBegin; 30 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 31 if (iascii) { 32 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 33 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 34 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 38 PetscFunctionReturn(0); 39 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 40 PetscFunctionReturn(0); 41 } 42 } 43 /* convert to dense format and call MatView() */ 44 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 45 ierr = MatView(Adense,viewer);CHKERRQ(ierr); 46 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 51 { 52 PetscErrorCode ierr; 53 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 54 PetscLogDouble isend[2],irecv[2]; 55 56 PetscFunctionBegin; 57 info->block_size = 1.0; 58 59 isend[0] = a->lld*a->locc; /* locally allocated */ 60 isend[1] = a->locr*a->locc; /* used submatrix */ 61 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 62 info->nz_allocated = isend[0]; 63 info->nz_used = isend[1]; 64 } else if (flag == MAT_GLOBAL_MAX) { 65 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 66 info->nz_allocated = irecv[0]; 67 info->nz_used = irecv[1]; 68 } else if (flag == MAT_GLOBAL_SUM) { 69 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 70 info->nz_allocated = irecv[0]; 71 info->nz_used = irecv[1]; 72 } 73 74 info->nz_unneeded = 0; 75 info->assemblies = A->num_ass; 76 info->mallocs = 0; 77 info->memory = ((PetscObject)A)->mem; 78 info->fill_ratio_given = 0; 79 info->fill_ratio_needed = 0; 80 info->factor_mallocs = 0; 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 85 { 86 PetscFunctionBegin; 87 switch (op) { 88 case MAT_NEW_NONZERO_LOCATIONS: 89 case MAT_NEW_NONZERO_LOCATION_ERR: 90 case MAT_NEW_NONZERO_ALLOCATION_ERR: 91 case MAT_SYMMETRIC: 92 case MAT_SORTED_FULL: 93 case MAT_HERMITIAN: 94 break; 95 default: 96 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 102 { 103 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 104 PetscErrorCode ierr; 105 PetscInt i,j; 106 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 107 108 PetscFunctionBegin; 109 for (i=0;i<nr;i++) { 110 if (rows[i] < 0) continue; 111 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 112 for (j=0;j<nc;j++) { 113 if (cols[j] < 0) continue; 114 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 115 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 116 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 117 switch (imode) { 118 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 119 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 120 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 121 } 122 } else { 123 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 124 A->assembled = PETSC_FALSE; 125 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 126 } 127 } 128 } 129 PetscFunctionReturn(0); 130 } 131 132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 133 { 134 PetscErrorCode ierr; 135 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 136 PetscScalar *x2d,*y2d,alpha=1.0; 137 const PetscInt *ranges; 138 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 139 140 PetscFunctionBegin; 141 if (transpose) { 142 143 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 144 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 145 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 146 xlld = PetscMax(1,A->rmap->n); 147 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 148 PetscCheckScaLapackInfo("descinit",info); 149 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 150 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 151 ylld = 1; 152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 153 PetscCheckScaLapackInfo("descinit",info); 154 155 /* allocate 2d vectors */ 156 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 157 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 158 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 159 xlld = PetscMax(1,lszx); 160 161 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 162 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 163 PetscCheckScaLapackInfo("descinit",info); 164 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 165 PetscCheckScaLapackInfo("descinit",info); 166 167 /* redistribute x as a column of a 2d matrix */ 168 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 169 170 /* redistribute y as a row of a 2d matrix */ 171 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 172 173 /* call PBLAS subroutine */ 174 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 175 176 /* redistribute y from a row of a 2d matrix */ 177 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 178 179 } else { /* non-transpose */ 180 181 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 182 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 183 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 184 xlld = 1; 185 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 186 PetscCheckScaLapackInfo("descinit",info); 187 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 188 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 189 ylld = PetscMax(1,A->rmap->n); 190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 191 PetscCheckScaLapackInfo("descinit",info); 192 193 /* allocate 2d vectors */ 194 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 195 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 196 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 197 ylld = PetscMax(1,lszy); 198 199 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 200 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 201 PetscCheckScaLapackInfo("descinit",info); 202 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 203 PetscCheckScaLapackInfo("descinit",info); 204 205 /* redistribute x as a row of a 2d matrix */ 206 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 207 208 /* redistribute y as a column of a 2d matrix */ 209 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 210 211 /* call PBLAS subroutine */ 212 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 213 214 /* redistribute y from a column of a 2d matrix */ 215 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 216 217 } 218 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 223 { 224 PetscErrorCode ierr; 225 const PetscScalar *xarray; 226 PetscScalar *yarray; 227 228 PetscFunctionBegin; 229 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 230 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 231 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 232 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 233 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 238 { 239 PetscErrorCode ierr; 240 const PetscScalar *xarray; 241 PetscScalar *yarray; 242 243 PetscFunctionBegin; 244 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 245 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 246 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 247 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 248 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 253 { 254 PetscErrorCode ierr; 255 const PetscScalar *xarray; 256 PetscScalar *zarray; 257 258 PetscFunctionBegin; 259 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 260 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 262 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 263 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 264 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 269 { 270 PetscErrorCode ierr; 271 const PetscScalar *xarray; 272 PetscScalar *zarray; 273 274 PetscFunctionBegin; 275 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 276 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 277 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 278 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 279 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 280 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 285 { 286 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 287 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 288 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 289 PetscScalar sone=1.0,zero=0.0; 290 PetscBLASInt one=1; 291 292 PetscFunctionBegin; 293 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 294 C->assembled = PETSC_TRUE; 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 304 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 305 ierr = MatSetUp(C);CHKERRQ(ierr); 306 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 307 PetscFunctionReturn(0); 308 } 309 310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 311 { 312 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 313 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 314 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 315 PetscScalar sone=1.0,zero=0.0; 316 PetscBLASInt one=1; 317 318 PetscFunctionBegin; 319 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 320 C->assembled = PETSC_TRUE; 321 PetscFunctionReturn(0); 322 } 323 324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 325 { 326 PetscErrorCode ierr; 327 328 PetscFunctionBegin; 329 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 330 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 331 ierr = MatSetUp(C);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /* --------------------------------------- */ 336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 337 { 338 PetscFunctionBegin; 339 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 340 C->ops->productsymbolic = MatProductSymbolic_AB; 341 PetscFunctionReturn(0); 342 } 343 344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 345 { 346 PetscFunctionBegin; 347 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 348 C->ops->productsymbolic = MatProductSymbolic_ABt; 349 PetscFunctionReturn(0); 350 } 351 352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 353 { 354 PetscErrorCode ierr; 355 Mat_Product *product = C->product; 356 357 PetscFunctionBegin; 358 switch (product->type) { 359 case MATPRODUCT_AB: 360 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 361 break; 362 case MATPRODUCT_ABt: 363 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 364 break; 365 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 366 } 367 PetscFunctionReturn(0); 368 } 369 /* --------------------------------------- */ 370 371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 372 { 373 PetscErrorCode ierr; 374 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 375 PetscScalar *darray,*d2d,v; 376 const PetscInt *ranges; 377 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 378 379 PetscFunctionBegin; 380 ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 381 382 if (A->rmap->N<=A->cmap->N) { /* row version */ 383 384 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 385 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 386 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 387 dlld = PetscMax(1,A->rmap->n); 388 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 389 PetscCheckScaLapackInfo("descinit",info); 390 391 /* allocate 2d vector */ 392 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 393 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 394 dlld = PetscMax(1,lszd); 395 396 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 397 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 398 PetscCheckScaLapackInfo("descinit",info); 399 400 /* collect diagonal */ 401 for (j=1;j<=a->M;j++) { 402 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 403 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 404 } 405 406 /* redistribute d from a column of a 2d matrix */ 407 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 408 ierr = PetscFree(d2d);CHKERRQ(ierr); 409 410 } else { /* column version */ 411 412 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 413 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 414 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 415 dlld = 1; 416 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 417 PetscCheckScaLapackInfo("descinit",info); 418 419 /* allocate 2d vector */ 420 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 421 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 422 423 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 424 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 425 PetscCheckScaLapackInfo("descinit",info); 426 427 /* collect diagonal */ 428 for (j=1;j<=a->N;j++) { 429 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 430 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 431 } 432 433 /* redistribute d from a row of a 2d matrix */ 434 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 435 ierr = PetscFree(d2d);CHKERRQ(ierr); 436 } 437 438 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 439 ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 445 { 446 PetscErrorCode ierr; 447 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 448 const PetscScalar *d; 449 const PetscInt *ranges; 450 PetscScalar *d2d; 451 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 452 453 PetscFunctionBegin; 454 if (R) { 455 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 456 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 457 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 458 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 459 dlld = 1; 460 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 461 PetscCheckScaLapackInfo("descinit",info); 462 463 /* allocate 2d vector */ 464 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 465 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 466 467 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 468 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 469 PetscCheckScaLapackInfo("descinit",info); 470 471 /* redistribute d to a row of a 2d matrix */ 472 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 473 474 /* broadcast along process columns */ 475 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 476 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 477 478 /* local scaling */ 479 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 480 481 ierr = PetscFree(d2d);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 483 } 484 if (L) { 485 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 486 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 487 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 488 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 489 dlld = PetscMax(1,A->rmap->n); 490 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 491 PetscCheckScaLapackInfo("descinit",info); 492 493 /* allocate 2d vector */ 494 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 495 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 496 dlld = PetscMax(1,lszd); 497 498 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 499 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 500 PetscCheckScaLapackInfo("descinit",info); 501 502 /* redistribute d to a column of a 2d matrix */ 503 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 504 505 /* broadcast along process rows */ 506 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 507 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 508 509 /* local scaling */ 510 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 511 512 ierr = PetscFree(d2d);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 514 } 515 PetscFunctionReturn(0); 516 } 517 518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 519 { 520 PetscFunctionBegin; 521 *missing = PETSC_FALSE; 522 PetscFunctionReturn(0); 523 } 524 525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 526 { 527 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 528 PetscBLASInt n,one=1; 529 530 PetscFunctionBegin; 531 n = x->lld*x->locc; 532 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 533 PetscFunctionReturn(0); 534 } 535 536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 537 { 538 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 539 PetscBLASInt i,n; 540 PetscScalar v; 541 542 PetscFunctionBegin; 543 n = PetscMin(x->M,x->N); 544 for (i=1;i<=n;i++) { 545 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 546 v += alpha; 547 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 548 } 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 553 { 554 PetscErrorCode ierr; 555 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 556 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 557 PetscBLASInt one=1; 558 PetscScalar beta=1.0; 559 560 PetscFunctionBegin; 561 MatScaLAPACKCheckDistribution(Y,1,X,3); 562 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 563 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 568 { 569 PetscErrorCode ierr; 570 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 571 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 572 573 PetscFunctionBegin; 574 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 575 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 580 { 581 Mat Bs; 582 MPI_Comm comm; 583 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 584 PetscErrorCode ierr; 585 586 PetscFunctionBegin; 587 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 588 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 589 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 590 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 591 b = (Mat_ScaLAPACK*)Bs->data; 592 b->M = a->M; 593 b->N = a->N; 594 b->mb = a->mb; 595 b->nb = a->nb; 596 b->rsrc = a->rsrc; 597 b->csrc = a->csrc; 598 ierr = MatSetUp(Bs);CHKERRQ(ierr); 599 *B = Bs; 600 if (op == MAT_COPY_VALUES) { 601 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 602 } 603 Bs->assembled = PETSC_TRUE; 604 PetscFunctionReturn(0); 605 } 606 607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 608 { 609 PetscErrorCode ierr; 610 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 611 Mat Bs = *B; 612 PetscBLASInt one=1; 613 PetscScalar sone=1.0,zero=0.0; 614 #if defined(PETSC_USE_COMPLEX) 615 PetscInt i,j; 616 #endif 617 618 PetscFunctionBegin; 619 if (reuse == MAT_INITIAL_MATRIX) { 620 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 621 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 622 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 623 ierr = MatSetUp(Bs);CHKERRQ(ierr); 624 *B = Bs; 625 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 626 b = (Mat_ScaLAPACK*)Bs->data; 627 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 628 #if defined(PETSC_USE_COMPLEX) 629 /* undo conjugation */ 630 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 631 #endif 632 Bs->assembled = PETSC_TRUE; 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 637 { 638 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 639 PetscInt i,j; 640 641 PetscFunctionBegin; 642 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 643 PetscFunctionReturn(0); 644 } 645 646 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 647 { 648 PetscErrorCode ierr; 649 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 650 Mat Bs = *B; 651 PetscBLASInt one=1; 652 PetscScalar sone=1.0,zero=0.0; 653 654 PetscFunctionBegin; 655 if (reuse == MAT_INITIAL_MATRIX) { 656 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 657 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 658 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 659 ierr = MatSetUp(Bs);CHKERRQ(ierr); 660 *B = Bs; 661 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 662 b = (Mat_ScaLAPACK*)Bs->data; 663 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 664 Bs->assembled = PETSC_TRUE; 665 PetscFunctionReturn(0); 666 } 667 668 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 669 { 670 PetscErrorCode ierr; 671 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 672 PetscScalar *x,*x2d; 673 const PetscInt *ranges; 674 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 675 676 PetscFunctionBegin; 677 ierr = VecCopy(B,X);CHKERRQ(ierr); 678 ierr = VecGetArray(X,&x);CHKERRQ(ierr); 679 680 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 681 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 682 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 683 xlld = PetscMax(1,A->rmap->n); 684 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 685 PetscCheckScaLapackInfo("descinit",info); 686 687 /* allocate 2d vector */ 688 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 689 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 690 xlld = PetscMax(1,lszx); 691 692 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 693 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 694 PetscCheckScaLapackInfo("descinit",info); 695 696 /* redistribute x as a column of a 2d matrix */ 697 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 698 699 /* call ScaLAPACK subroutine */ 700 switch (A->factortype) { 701 case MAT_FACTOR_LU: 702 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 703 PetscCheckScaLapackInfo("getrs",info); 704 break; 705 case MAT_FACTOR_CHOLESKY: 706 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 707 PetscCheckScaLapackInfo("potrs",info); 708 break; 709 default: 710 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 711 break; 712 } 713 714 /* redistribute x from a column of a 2d matrix */ 715 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 716 717 ierr = PetscFree(x2d);CHKERRQ(ierr); 718 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 723 { 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 728 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 733 { 734 PetscErrorCode ierr; 735 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 736 PetscBool flg1,flg2; 737 PetscBLASInt one=1,info; 738 739 PetscFunctionBegin; 740 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 742 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 743 MatScaLAPACKCheckDistribution(B,1,X,2); 744 b = (Mat_ScaLAPACK*)B->data; 745 x = (Mat_ScaLAPACK*)X->data; 746 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 747 748 switch (A->factortype) { 749 case MAT_FACTOR_LU: 750 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 751 PetscCheckScaLapackInfo("getrs",info); 752 break; 753 case MAT_FACTOR_CHOLESKY: 754 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 755 PetscCheckScaLapackInfo("potrs",info); 756 break; 757 default: 758 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 759 break; 760 } 761 PetscFunctionReturn(0); 762 } 763 764 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 765 { 766 PetscErrorCode ierr; 767 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 768 PetscBLASInt one=1,info; 769 770 PetscFunctionBegin; 771 if (!a->pivots) { 772 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 773 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 774 } 775 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 776 PetscCheckScaLapackInfo("getrf",info); 777 A->factortype = MAT_FACTOR_LU; 778 A->assembled = PETSC_TRUE; 779 780 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 781 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 782 PetscFunctionReturn(0); 783 } 784 785 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 786 { 787 PetscErrorCode ierr; 788 789 PetscFunctionBegin; 790 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 791 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 796 { 797 PetscFunctionBegin; 798 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 799 PetscFunctionReturn(0); 800 } 801 802 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 803 { 804 PetscErrorCode ierr; 805 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 806 PetscBLASInt one=1,info; 807 808 PetscFunctionBegin; 809 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 810 PetscCheckScaLapackInfo("potrf",info); 811 A->factortype = MAT_FACTOR_CHOLESKY; 812 A->assembled = PETSC_TRUE; 813 814 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 815 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 820 { 821 PetscErrorCode ierr; 822 823 PetscFunctionBegin; 824 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 825 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 830 { 831 PetscFunctionBegin; 832 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 833 PetscFunctionReturn(0); 834 } 835 836 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 837 { 838 PetscFunctionBegin; 839 *type = MATSOLVERSCALAPACK; 840 PetscFunctionReturn(0); 841 } 842 843 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 844 { 845 Mat B; 846 PetscErrorCode ierr; 847 848 PetscFunctionBegin; 849 /* Create the factorization matrix */ 850 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 851 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 852 ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr); 853 ierr = MatSetUp(B);CHKERRQ(ierr); 854 B->factortype = ftype; 855 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 856 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 857 858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 859 *F = B; 860 PetscFunctionReturn(0); 861 } 862 863 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 864 { 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 869 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 874 { 875 PetscErrorCode ierr; 876 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 877 PetscBLASInt one=1,lwork=0; 878 const char *ntype; 879 PetscScalar *work=NULL,dummy; 880 881 PetscFunctionBegin; 882 switch (type){ 883 case NORM_1: 884 ntype = "1"; 885 lwork = PetscMax(a->locr,a->locc); 886 break; 887 case NORM_FROBENIUS: 888 ntype = "F"; 889 work = &dummy; 890 break; 891 case NORM_INFINITY: 892 ntype = "I"; 893 lwork = PetscMax(a->locr,a->locc); 894 break; 895 default: 896 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 897 } 898 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 899 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 900 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 901 PetscFunctionReturn(0); 902 } 903 904 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 905 { 906 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 907 PetscErrorCode ierr; 908 909 PetscFunctionBegin; 910 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 915 { 916 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 917 PetscErrorCode ierr; 918 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 919 920 PetscFunctionBegin; 921 if (rows) { 922 n = a->locr; 923 nb = a->mb; 924 isrc = a->rsrc; 925 nproc = a->grid->nprow; 926 iproc = a->grid->myrow; 927 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 928 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 929 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 930 } 931 if (cols) { 932 n = a->locc; 933 nb = a->nb; 934 isrc = a->csrc; 935 nproc = a->grid->npcol; 936 iproc = a->grid->mycol; 937 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 938 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 939 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 940 } 941 PetscFunctionReturn(0); 942 } 943 944 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 945 { 946 PetscErrorCode ierr; 947 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 948 Mat Bmpi; 949 MPI_Comm comm; 950 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 951 const PetscInt *ranges,*branges,*cwork; 952 const PetscScalar *vwork; 953 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 954 PetscScalar *barray; 955 PetscBool differ=PETSC_FALSE; 956 PetscMPIInt size; 957 958 PetscFunctionBegin; 959 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 960 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 961 962 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 964 ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr); 965 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 966 } 967 968 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 969 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 970 m = PETSC_DECIDE; 971 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 972 n = PETSC_DECIDE; 973 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 974 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 975 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 976 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 977 978 /* create ScaLAPACK descriptor for B (1d block distribution) */ 979 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 980 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 981 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 982 PetscCheckScaLapackInfo("descinit",info); 983 984 /* redistribute matrix */ 985 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 986 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 987 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 988 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 989 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 991 /* transfer rows of auxiliary matrix to the final matrix B */ 992 ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr); 993 for (i=rstart;i<rend;i++) { 994 ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 995 ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 996 ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 997 } 998 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 ierr = MatDestroy(&Bmpi);CHKERRQ(ierr); 1001 1002 } else { /* normal cases */ 1003 1004 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1005 else { 1006 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1007 m = PETSC_DECIDE; 1008 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1009 n = PETSC_DECIDE; 1010 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1011 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1012 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 1013 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1014 } 1015 1016 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1017 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 1018 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1019 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 1020 PetscCheckScaLapackInfo("descinit",info); 1021 1022 /* redistribute matrix */ 1023 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 1024 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1025 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 1026 1027 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 if (reuse == MAT_INPLACE_MATRIX) { 1030 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1031 } else *B = Bmpi; 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1037 { 1038 PetscErrorCode ierr; 1039 Mat_ScaLAPACK *b; 1040 Mat Bmpi; 1041 MPI_Comm comm; 1042 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1043 const PetscInt *ranges; 1044 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1045 PetscScalar *aarray; 1046 PetscInt lda; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1050 1051 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1052 else { 1053 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1054 m = PETSC_DECIDE; 1055 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1056 n = PETSC_DECIDE; 1057 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1058 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1059 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1060 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1061 } 1062 b = (Mat_ScaLAPACK*)Bmpi->data; 1063 1064 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1065 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1066 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1067 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1068 lld = PetscMax(lda,1); /* local leading dimension */ 1069 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1070 PetscCheckScaLapackInfo("descinit",info); 1071 1072 /* redistribute matrix */ 1073 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1074 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1075 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1076 1077 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1078 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1079 if (reuse == MAT_INPLACE_MATRIX) { 1080 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1081 } else *B = Bmpi; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1086 { 1087 Mat mat_scal; 1088 PetscErrorCode ierr; 1089 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1090 const PetscInt *cols; 1091 const PetscScalar *vals; 1092 1093 PetscFunctionBegin; 1094 if (reuse == MAT_REUSE_MATRIX) { 1095 mat_scal = *newmat; 1096 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1097 } else { 1098 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1099 m = PETSC_DECIDE; 1100 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1101 n = PETSC_DECIDE; 1102 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1103 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1104 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1105 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1106 } 1107 for (row=rstart;row<rend;row++) { 1108 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1109 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1110 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1111 } 1112 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1113 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 1115 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1116 else *newmat = mat_scal; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1121 { 1122 Mat mat_scal; 1123 PetscErrorCode ierr; 1124 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1125 const PetscInt *cols; 1126 const PetscScalar *vals; 1127 PetscScalar v; 1128 1129 PetscFunctionBegin; 1130 if (reuse == MAT_REUSE_MATRIX) { 1131 mat_scal = *newmat; 1132 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1135 m = PETSC_DECIDE; 1136 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1137 n = PETSC_DECIDE; 1138 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1139 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1140 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1141 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1142 } 1143 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1144 for (row=rstart;row<rend;row++) { 1145 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1146 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1147 for (j=0;j<ncols;j++) { /* lower triangular part */ 1148 if (cols[j] == row) continue; 1149 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1150 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1151 } 1152 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1153 } 1154 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1155 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1156 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1157 1158 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1159 else *newmat = mat_scal; 1160 PetscFunctionReturn(0); 1161 } 1162 1163 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1164 { 1165 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1166 PetscErrorCode ierr; 1167 PetscInt sz=0; 1168 1169 PetscFunctionBegin; 1170 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1171 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1172 if (!a->lld) a->lld = a->locr; 1173 1174 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1175 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1176 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1177 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1178 1179 A->preallocated = PETSC_TRUE; 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1184 { 1185 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1186 PetscErrorCode ierr; 1187 Mat_ScaLAPACK_Grid *grid; 1188 PetscBool flg; 1189 MPI_Comm icomm; 1190 1191 PetscFunctionBegin; 1192 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1193 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1194 ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1195 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1196 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1197 if (--grid->grid_refct == 0) { 1198 Cblacs_gridexit(grid->ictxt); 1199 Cblacs_gridexit(grid->ictxrow); 1200 Cblacs_gridexit(grid->ictxcol); 1201 ierr = PetscFree(grid);CHKERRQ(ierr); 1202 ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 1203 } 1204 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1207 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscFree(A->data);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1214 { 1215 PetscErrorCode ierr; 1216 const PetscInt *ranges; 1217 PetscMPIInt size; 1218 PetscInt i,n; 1219 1220 PetscFunctionBegin; 1221 ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr); 1222 if (size>2) { 1223 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1224 n = ranges[1]-ranges[0]; 1225 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1226 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1227 } 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1232 { 1233 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1234 PetscErrorCode ierr; 1235 PetscBLASInt info=0; 1236 1237 PetscFunctionBegin; 1238 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1239 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1240 1241 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1242 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1243 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1244 1245 /* compute local sizes */ 1246 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1247 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1248 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1249 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1250 a->lld = PetscMax(1,a->locr); 1251 1252 /* allocate local array */ 1253 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1254 1255 /* set up ScaLAPACK descriptor */ 1256 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1257 PetscCheckScaLapackInfo("descinit",info); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1262 { 1263 PetscErrorCode ierr; 1264 PetscInt nstash,reallocs; 1265 1266 PetscFunctionBegin; 1267 if (A->nooffprocentries) PetscFunctionReturn(0); 1268 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1269 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1270 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1275 { 1276 PetscErrorCode ierr; 1277 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1278 PetscMPIInt n; 1279 PetscInt i,flg,*row,*col; 1280 PetscScalar *val; 1281 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1282 1283 PetscFunctionBegin; 1284 if (A->nooffprocentries) PetscFunctionReturn(0); 1285 while (1) { 1286 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1287 if (!flg) break; 1288 for (i=0;i<n;i++) { 1289 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1290 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1291 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1292 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1293 switch (A->insertmode) { 1294 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1295 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1296 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1297 } 1298 } 1299 } 1300 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1305 { 1306 PetscErrorCode ierr; 1307 Mat Adense,As; 1308 MPI_Comm comm; 1309 1310 PetscFunctionBegin; 1311 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1312 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1313 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1314 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1315 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1316 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1317 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* -------------------------------------------------------------------*/ 1322 static struct _MatOps MatOps_Values = { 1323 MatSetValues_ScaLAPACK, 1324 0, 1325 0, 1326 MatMult_ScaLAPACK, 1327 /* 4*/ MatMultAdd_ScaLAPACK, 1328 MatMultTranspose_ScaLAPACK, 1329 MatMultTransposeAdd_ScaLAPACK, 1330 MatSolve_ScaLAPACK, 1331 MatSolveAdd_ScaLAPACK, 1332 0, 1333 /*10*/ 0, 1334 MatLUFactor_ScaLAPACK, 1335 MatCholeskyFactor_ScaLAPACK, 1336 0, 1337 MatTranspose_ScaLAPACK, 1338 /*15*/ MatGetInfo_ScaLAPACK, 1339 0, 1340 MatGetDiagonal_ScaLAPACK, 1341 MatDiagonalScale_ScaLAPACK, 1342 MatNorm_ScaLAPACK, 1343 /*20*/ MatAssemblyBegin_ScaLAPACK, 1344 MatAssemblyEnd_ScaLAPACK, 1345 MatSetOption_ScaLAPACK, 1346 MatZeroEntries_ScaLAPACK, 1347 /*24*/ 0, 1348 MatLUFactorSymbolic_ScaLAPACK, 1349 MatLUFactorNumeric_ScaLAPACK, 1350 MatCholeskyFactorSymbolic_ScaLAPACK, 1351 MatCholeskyFactorNumeric_ScaLAPACK, 1352 /*29*/ MatSetUp_ScaLAPACK, 1353 0, 1354 0, 1355 0, 1356 0, 1357 /*34*/ MatDuplicate_ScaLAPACK, 1358 0, 1359 0, 1360 0, 1361 0, 1362 /*39*/ MatAXPY_ScaLAPACK, 1363 0, 1364 0, 1365 0, 1366 MatCopy_ScaLAPACK, 1367 /*44*/ 0, 1368 MatScale_ScaLAPACK, 1369 MatShift_ScaLAPACK, 1370 0, 1371 0, 1372 /*49*/ 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 /*54*/ 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /*59*/ 0, 1383 MatDestroy_ScaLAPACK, 1384 MatView_ScaLAPACK, 1385 0, 1386 0, 1387 /*64*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*69*/ 0, 1393 0, 1394 MatConvert_ScaLAPACK_Dense, 1395 0, 1396 0, 1397 /*74*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*79*/ 0, 1403 0, 1404 0, 1405 0, 1406 MatLoad_ScaLAPACK, 1407 /*84*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*89*/ 0, 1413 0, 1414 MatMatMultNumeric_ScaLAPACK, 1415 0, 1416 0, 1417 /*94*/ 0, 1418 0, 1419 0, 1420 MatMatTransposeMultNumeric_ScaLAPACK, 1421 0, 1422 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1423 0, 1424 0, 1425 MatConjugate_ScaLAPACK, 1426 0, 1427 /*104*/0, 1428 0, 1429 0, 1430 0, 1431 0, 1432 /*109*/MatMatSolve_ScaLAPACK, 1433 0, 1434 0, 1435 0, 1436 MatMissingDiagonal_ScaLAPACK, 1437 /*114*/0, 1438 0, 1439 0, 1440 0, 1441 0, 1442 /*119*/0, 1443 MatHermitianTranspose_ScaLAPACK, 1444 0, 1445 0, 1446 0, 1447 /*124*/0, 1448 0, 1449 0, 1450 0, 1451 0, 1452 /*129*/0, 1453 0, 1454 0, 1455 0, 1456 0, 1457 /*134*/0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 /*140*/0, 1464 0, 1465 0, 1466 0, 1467 0, 1468 /*145*/0, 1469 0, 1470 0 1471 }; 1472 1473 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1474 { 1475 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1476 PetscInt size=stash->size,nsends; 1477 PetscErrorCode ierr; 1478 PetscInt count,*sindices,**rindices,i,j,l; 1479 PetscScalar **rvalues,*svalues; 1480 MPI_Comm comm = stash->comm; 1481 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1482 PetscMPIInt *sizes,*nlengths,nreceives; 1483 PetscInt *sp_idx,*sp_idy; 1484 PetscScalar *sp_val; 1485 PetscMatStashSpace space,space_next; 1486 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1487 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1488 1489 PetscFunctionBegin; 1490 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1491 InsertMode addv; 1492 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1493 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1494 mat->insertmode = addv; /* in case this processor had no cache */ 1495 } 1496 1497 bs2 = stash->bs*stash->bs; 1498 1499 /* first count number of contributors to each processor */ 1500 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1501 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1502 1503 i = j = 0; 1504 space = stash->space_head; 1505 while (space) { 1506 space_next = space->next; 1507 for (l=0; l<space->local_used; l++) { 1508 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1509 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1510 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1511 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1512 nlengths[j]++; owner[i] = j; 1513 i++; 1514 } 1515 space = space_next; 1516 } 1517 1518 /* Now check what procs get messages - and compute nsends. */ 1519 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1520 for (i=0, nsends=0; i<size; i++) { 1521 if (nlengths[i]) { 1522 sizes[i] = 1; nsends++; 1523 } 1524 } 1525 1526 {PetscMPIInt *onodes,*olengths; 1527 /* Determine the number of messages to expect, their lengths, from from-ids */ 1528 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1529 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1530 /* since clubbing row,col - lengths are multiplied by 2 */ 1531 for (i=0; i<nreceives; i++) olengths[i] *=2; 1532 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1533 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1534 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1535 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1536 ierr = PetscFree(onodes);CHKERRQ(ierr); 1537 ierr = PetscFree(olengths);CHKERRQ(ierr);} 1538 1539 /* do sends: 1540 1) starts[i] gives the starting index in svalues for stuff going to 1541 the ith processor 1542 */ 1543 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1544 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1545 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1546 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1547 startv[0] = 0; starti[0] = 0; 1548 for (i=1; i<size; i++) { 1549 startv[i] = startv[i-1] + nlengths[i-1]; 1550 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1551 } 1552 1553 i = 0; 1554 space = stash->space_head; 1555 while (space) { 1556 space_next = space->next; 1557 sp_idx = space->idx; 1558 sp_idy = space->idy; 1559 sp_val = space->val; 1560 for (l=0; l<space->local_used; l++) { 1561 j = owner[i]; 1562 if (bs2 == 1) { 1563 svalues[startv[j]] = sp_val[l]; 1564 } else { 1565 PetscInt k; 1566 PetscScalar *buf1,*buf2; 1567 buf1 = svalues+bs2*startv[j]; 1568 buf2 = space->val + bs2*l; 1569 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1570 } 1571 sindices[starti[j]] = sp_idx[l]; 1572 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1573 startv[j]++; 1574 starti[j]++; 1575 i++; 1576 } 1577 space = space_next; 1578 } 1579 startv[0] = 0; 1580 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1581 1582 for (i=0,count=0; i<size; i++) { 1583 if (sizes[i]) { 1584 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 1585 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 1586 } 1587 } 1588 #if defined(PETSC_USE_INFO) 1589 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1590 for (i=0; i<size; i++) { 1591 if (sizes[i]) { 1592 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1593 } 1594 } 1595 #endif 1596 ierr = PetscFree(nlengths);CHKERRQ(ierr); 1597 ierr = PetscFree(owner);CHKERRQ(ierr); 1598 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1599 ierr = PetscFree(sizes);CHKERRQ(ierr); 1600 1601 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1602 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1603 1604 for (i=0; i<nreceives; i++) { 1605 recv_waits[2*i] = recv_waits1[i]; 1606 recv_waits[2*i+1] = recv_waits2[i]; 1607 } 1608 stash->recv_waits = recv_waits; 1609 1610 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1611 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1612 1613 stash->svalues = svalues; 1614 stash->sindices = sindices; 1615 stash->rvalues = rvalues; 1616 stash->rindices = rindices; 1617 stash->send_waits = send_waits; 1618 stash->nsends = nsends; 1619 stash->nrecvs = nreceives; 1620 stash->reproduce_count = 0; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1625 { 1626 PetscErrorCode ierr; 1627 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1628 1629 PetscFunctionBegin; 1630 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1631 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1632 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1633 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1634 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 /*@ 1639 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1640 the ScaLAPACK matrix 1641 1642 Logically Collective on A 1643 1644 Input Parameter: 1645 + A - a MATSCALAPACK matrix 1646 . mb - the row block size 1647 - nb - the column block size 1648 1649 Level: intermediate 1650 1651 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1652 @*/ 1653 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1654 { 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1659 PetscValidLogicalCollectiveInt(A,mb,2); 1660 PetscValidLogicalCollectiveInt(A,nb,3); 1661 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1666 { 1667 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1668 1669 PetscFunctionBegin; 1670 if (mb) *mb = a->mb; 1671 if (nb) *nb = a->nb; 1672 PetscFunctionReturn(0); 1673 } 1674 1675 /*@ 1676 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1677 the ScaLAPACK matrix 1678 1679 Not collective 1680 1681 Input Parameter: 1682 . A - a MATSCALAPACK matrix 1683 1684 Output Parameters: 1685 + mb - the row block size 1686 - nb - the column block size 1687 1688 Level: intermediate 1689 1690 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1691 @*/ 1692 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1693 { 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1698 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1703 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1704 1705 /*MC 1706 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1707 1708 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1709 1710 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1711 1712 Options Database Keys: 1713 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1714 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1715 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1716 1717 Level: beginner 1718 1719 .seealso: MATDENSE, MATELEMENTAL 1720 M*/ 1721 1722 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1723 { 1724 Mat_ScaLAPACK *a; 1725 PetscErrorCode ierr; 1726 PetscBool flg,flg1; 1727 Mat_ScaLAPACK_Grid *grid; 1728 MPI_Comm icomm; 1729 PetscBLASInt nprow,npcol,myrow,mycol; 1730 PetscInt optv1,k=2,array[2]={0,0}; 1731 PetscMPIInt size; 1732 1733 PetscFunctionBegin; 1734 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1735 A->insertmode = NOT_SET_VALUES; 1736 1737 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1738 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1739 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1740 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1741 A->stash.ScatterDestroy = NULL; 1742 1743 ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1744 A->data = (void*)a; 1745 1746 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1747 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1748 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr); 1749 ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr); 1750 } 1751 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1752 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1753 if (!flg) { 1754 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1755 1756 ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr); 1757 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1758 1759 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1760 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1761 if (flg1) { 1762 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1763 grid->nprow = optv1; 1764 } 1765 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1766 1767 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1768 grid->npcol = size/grid->nprow; 1769 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1770 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1771 grid->ictxt = Csys2blacs_handle(icomm); 1772 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1773 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1774 grid->grid_refct = 1; 1775 grid->nprow = nprow; 1776 grid->npcol = npcol; 1777 grid->myrow = myrow; 1778 grid->mycol = mycol; 1779 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1780 grid->ictxrow = Csys2blacs_handle(icomm); 1781 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1782 grid->ictxcol = Csys2blacs_handle(icomm); 1783 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1784 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr); 1785 1786 } else grid->grid_refct++; 1787 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1788 a->grid = grid; 1789 a->mb = DEFAULT_BLOCKSIZE; 1790 a->nb = DEFAULT_BLOCKSIZE; 1791 1792 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1793 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1794 if (flg) { 1795 a->mb = array[0]; 1796 a->nb = (k>1)? array[1]: a->mb; 1797 } 1798 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1799 1800 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1801 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1802 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1803 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 /*@C 1808 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1809 (2D block cyclic distribution). 1810 1811 Collective 1812 1813 Input Parameters: 1814 + comm - MPI communicator 1815 . mb - row block size (or PETSC_DECIDE to have it set) 1816 . nb - column block size (or PETSC_DECIDE to have it set) 1817 . M - number of global rows 1818 . N - number of global columns 1819 . rsrc - coordinate of process that owns the first row of the distributed matrix 1820 - csrc - coordinate of process that owns the first column of the distributed matrix 1821 1822 Output Parameter: 1823 . A - the matrix 1824 1825 Options Database Keys: 1826 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1827 1828 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1829 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1830 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1831 1832 Notes: 1833 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1834 is chosen. 1835 1836 Storage Information: 1837 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1838 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1839 significance and are thus ignored. The block sizes refer to the values 1840 used for the distributed matrix, not the same meaning as in BAIJ. 1841 1842 Level: intermediate 1843 1844 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1845 @*/ 1846 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1847 { 1848 PetscErrorCode ierr; 1849 Mat_ScaLAPACK *a; 1850 PetscInt m,n; 1851 1852 PetscFunctionBegin; 1853 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1854 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1855 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1856 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1857 m = PETSC_DECIDE; 1858 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1859 n = PETSC_DECIDE; 1860 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1861 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1862 a = (Mat_ScaLAPACK*)(*A)->data; 1863 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1869 ierr = MatSetUp(*A);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873