1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 #define DEFAULT_BLOCKSIZE 64 4 5 /* 6 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8 */ 9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10 11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr); 17 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 18 PetscFunctionReturn(0); 19 } 20 21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 22 { 23 PetscErrorCode ierr; 24 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 25 PetscBool iascii; 26 PetscViewerFormat format; 27 Mat Adense; 28 29 PetscFunctionBegin; 30 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 31 if (iascii) { 32 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 33 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 34 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 38 PetscFunctionReturn(0); 39 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 40 PetscFunctionReturn(0); 41 } 42 } 43 /* convert to dense format and call MatView() */ 44 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 45 ierr = MatView(Adense,viewer);CHKERRQ(ierr); 46 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 51 { 52 PetscErrorCode ierr; 53 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 54 PetscLogDouble isend[2],irecv[2]; 55 56 PetscFunctionBegin; 57 info->block_size = 1.0; 58 59 isend[0] = a->lld*a->locc; /* locally allocated */ 60 isend[1] = a->locr*a->locc; /* used submatrix */ 61 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 62 info->nz_allocated = isend[0]; 63 info->nz_used = isend[1]; 64 } else if (flag == MAT_GLOBAL_MAX) { 65 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 66 info->nz_allocated = irecv[0]; 67 info->nz_used = irecv[1]; 68 } else if (flag == MAT_GLOBAL_SUM) { 69 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 70 info->nz_allocated = irecv[0]; 71 info->nz_used = irecv[1]; 72 } 73 74 info->nz_unneeded = 0; 75 info->assemblies = A->num_ass; 76 info->mallocs = 0; 77 info->memory = ((PetscObject)A)->mem; 78 info->fill_ratio_given = 0; 79 info->fill_ratio_needed = 0; 80 info->factor_mallocs = 0; 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 85 { 86 PetscFunctionBegin; 87 switch (op) { 88 case MAT_NEW_NONZERO_LOCATIONS: 89 case MAT_NEW_NONZERO_LOCATION_ERR: 90 case MAT_NEW_NONZERO_ALLOCATION_ERR: 91 case MAT_SYMMETRIC: 92 case MAT_SORTED_FULL: 93 case MAT_HERMITIAN: 94 break; 95 default: 96 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 102 { 103 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 104 PetscErrorCode ierr; 105 PetscInt i,j; 106 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 107 108 PetscFunctionBegin; 109 for (i=0;i<nr;i++) { 110 if (rows[i] < 0) continue; 111 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 112 for (j=0;j<nc;j++) { 113 if (cols[j] < 0) continue; 114 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 115 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 116 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 117 switch (imode) { 118 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 119 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 120 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 121 } 122 } else { 123 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 124 A->assembled = PETSC_FALSE; 125 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 126 } 127 } 128 } 129 PetscFunctionReturn(0); 130 } 131 132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 133 { 134 PetscErrorCode ierr; 135 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 136 PetscScalar *x2d,*y2d,alpha=1.0; 137 const PetscInt *ranges; 138 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 139 140 PetscFunctionBegin; 141 if (transpose) { 142 143 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 144 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 145 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 146 xlld = PetscMax(1,A->rmap->n); 147 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 148 PetscCheckScaLapackInfo("descinit",info); 149 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 150 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 151 ylld = 1; 152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 153 PetscCheckScaLapackInfo("descinit",info); 154 155 /* allocate 2d vectors */ 156 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 157 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 158 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 159 xlld = PetscMax(1,lszx); 160 161 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 162 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 163 PetscCheckScaLapackInfo("descinit",info); 164 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 165 PetscCheckScaLapackInfo("descinit",info); 166 167 /* redistribute x as a column of a 2d matrix */ 168 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 169 170 /* redistribute y as a row of a 2d matrix */ 171 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 172 173 /* call PBLAS subroutine */ 174 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 175 176 /* redistribute y from a row of a 2d matrix */ 177 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 178 179 } else { /* non-transpose */ 180 181 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 182 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 183 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 184 xlld = 1; 185 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 186 PetscCheckScaLapackInfo("descinit",info); 187 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 188 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 189 ylld = PetscMax(1,A->rmap->n); 190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 191 PetscCheckScaLapackInfo("descinit",info); 192 193 /* allocate 2d vectors */ 194 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 195 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 196 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 197 ylld = PetscMax(1,lszy); 198 199 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 200 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 201 PetscCheckScaLapackInfo("descinit",info); 202 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 203 PetscCheckScaLapackInfo("descinit",info); 204 205 /* redistribute x as a row of a 2d matrix */ 206 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 207 208 /* redistribute y as a column of a 2d matrix */ 209 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 210 211 /* call PBLAS subroutine */ 212 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 213 214 /* redistribute y from a column of a 2d matrix */ 215 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 216 217 } 218 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 223 { 224 PetscErrorCode ierr; 225 const PetscScalar *xarray; 226 PetscScalar *yarray; 227 228 PetscFunctionBegin; 229 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 230 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 231 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 232 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 233 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 238 { 239 PetscErrorCode ierr; 240 const PetscScalar *xarray; 241 PetscScalar *yarray; 242 243 PetscFunctionBegin; 244 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 245 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 246 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 247 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 248 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 253 { 254 PetscErrorCode ierr; 255 const PetscScalar *xarray; 256 PetscScalar *zarray; 257 258 PetscFunctionBegin; 259 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 260 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 262 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 263 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 264 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 269 { 270 PetscErrorCode ierr; 271 const PetscScalar *xarray; 272 PetscScalar *zarray; 273 274 PetscFunctionBegin; 275 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 276 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 277 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 278 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 279 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 280 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 285 { 286 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 287 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 288 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 289 PetscScalar sone=1.0,zero=0.0; 290 PetscBLASInt one=1; 291 292 PetscFunctionBegin; 293 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 294 C->assembled = PETSC_TRUE; 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 304 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 305 ierr = MatSetUp(C);CHKERRQ(ierr); 306 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 307 PetscFunctionReturn(0); 308 } 309 310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 311 { 312 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 313 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 314 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 315 PetscScalar sone=1.0,zero=0.0; 316 PetscBLASInt one=1; 317 318 PetscFunctionBegin; 319 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 320 C->assembled = PETSC_TRUE; 321 PetscFunctionReturn(0); 322 } 323 324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 325 { 326 PetscErrorCode ierr; 327 328 PetscFunctionBegin; 329 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 330 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 331 ierr = MatSetUp(C);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /* --------------------------------------- */ 336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 337 { 338 PetscFunctionBegin; 339 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 340 C->ops->productsymbolic = MatProductSymbolic_AB; 341 PetscFunctionReturn(0); 342 } 343 344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 345 { 346 PetscFunctionBegin; 347 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 348 C->ops->productsymbolic = MatProductSymbolic_ABt; 349 PetscFunctionReturn(0); 350 } 351 352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 353 { 354 PetscErrorCode ierr; 355 Mat_Product *product = C->product; 356 357 PetscFunctionBegin; 358 switch (product->type) { 359 case MATPRODUCT_AB: 360 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 361 break; 362 case MATPRODUCT_ABt: 363 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 364 break; 365 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 366 } 367 PetscFunctionReturn(0); 368 } 369 /* --------------------------------------- */ 370 371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 372 { 373 PetscErrorCode ierr; 374 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 375 PetscScalar *darray,*d2d,v; 376 const PetscInt *ranges; 377 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 378 379 PetscFunctionBegin; 380 ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 381 382 if (A->rmap->N<=A->cmap->N) { /* row version */ 383 384 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 385 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 386 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 387 dlld = PetscMax(1,A->rmap->n); 388 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 389 PetscCheckScaLapackInfo("descinit",info); 390 391 /* allocate 2d vector */ 392 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 393 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 394 dlld = PetscMax(1,lszd); 395 396 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 397 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 398 PetscCheckScaLapackInfo("descinit",info); 399 400 /* collect diagonal */ 401 for (j=1;j<=a->M;j++) { 402 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 403 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 404 } 405 406 /* redistribute d from a column of a 2d matrix */ 407 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 408 ierr = PetscFree(d2d);CHKERRQ(ierr); 409 410 } else { /* column version */ 411 412 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 413 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 414 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 415 dlld = 1; 416 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 417 PetscCheckScaLapackInfo("descinit",info); 418 419 /* allocate 2d vector */ 420 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 421 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 422 423 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 424 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 425 PetscCheckScaLapackInfo("descinit",info); 426 427 /* collect diagonal */ 428 for (j=1;j<=a->N;j++) { 429 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 430 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 431 } 432 433 /* redistribute d from a row of a 2d matrix */ 434 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 435 ierr = PetscFree(d2d);CHKERRQ(ierr); 436 } 437 438 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 439 ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 445 { 446 PetscErrorCode ierr; 447 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 448 const PetscScalar *d; 449 const PetscInt *ranges; 450 PetscScalar *d2d; 451 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 452 453 PetscFunctionBegin; 454 if (R) { 455 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 456 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 457 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 458 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 459 dlld = 1; 460 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 461 PetscCheckScaLapackInfo("descinit",info); 462 463 /* allocate 2d vector */ 464 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 465 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 466 467 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 468 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 469 PetscCheckScaLapackInfo("descinit",info); 470 471 /* redistribute d to a row of a 2d matrix */ 472 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 473 474 /* broadcast along process columns */ 475 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 476 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 477 478 /* local scaling */ 479 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 480 481 ierr = PetscFree(d2d);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 483 } 484 if (L) { 485 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 486 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 487 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 488 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 489 dlld = PetscMax(1,A->rmap->n); 490 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 491 PetscCheckScaLapackInfo("descinit",info); 492 493 /* allocate 2d vector */ 494 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 495 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 496 dlld = PetscMax(1,lszd); 497 498 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 499 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 500 PetscCheckScaLapackInfo("descinit",info); 501 502 /* redistribute d to a column of a 2d matrix */ 503 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 504 505 /* broadcast along process rows */ 506 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 507 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 508 509 /* local scaling */ 510 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 511 512 ierr = PetscFree(d2d);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 514 } 515 PetscFunctionReturn(0); 516 } 517 518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 519 { 520 PetscFunctionBegin; 521 *missing = PETSC_FALSE; 522 PetscFunctionReturn(0); 523 } 524 525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 526 { 527 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 528 PetscBLASInt n,one=1; 529 530 PetscFunctionBegin; 531 n = x->lld*x->locc; 532 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 533 PetscFunctionReturn(0); 534 } 535 536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 537 { 538 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 539 PetscBLASInt i,n; 540 PetscScalar v; 541 542 PetscFunctionBegin; 543 n = PetscMin(x->M,x->N); 544 for (i=1;i<=n;i++) { 545 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 546 v += alpha; 547 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 548 } 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 553 { 554 PetscErrorCode ierr; 555 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 556 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 557 PetscBLASInt one=1; 558 PetscScalar beta=1.0; 559 560 PetscFunctionBegin; 561 MatScaLAPACKCheckDistribution(Y,1,X,3); 562 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 563 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 568 { 569 PetscErrorCode ierr; 570 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 571 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 572 573 PetscFunctionBegin; 574 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 575 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 580 { 581 Mat Bs; 582 MPI_Comm comm; 583 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 584 PetscErrorCode ierr; 585 586 PetscFunctionBegin; 587 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 588 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 589 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 590 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 591 b = (Mat_ScaLAPACK*)Bs->data; 592 b->M = a->M; 593 b->N = a->N; 594 b->mb = a->mb; 595 b->nb = a->nb; 596 b->rsrc = a->rsrc; 597 b->csrc = a->csrc; 598 ierr = MatSetUp(Bs);CHKERRQ(ierr); 599 *B = Bs; 600 if (op == MAT_COPY_VALUES) { 601 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 602 } 603 Bs->assembled = PETSC_TRUE; 604 PetscFunctionReturn(0); 605 } 606 607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 608 { 609 PetscErrorCode ierr; 610 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 611 Mat Bs = *B; 612 PetscBLASInt one=1; 613 PetscScalar sone=1.0,zero=0.0; 614 #if defined(PETSC_USE_COMPLEX) 615 PetscInt i,j; 616 #endif 617 618 PetscFunctionBegin; 619 if (reuse == MAT_INITIAL_MATRIX) { 620 ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr); 621 *B = Bs; 622 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 623 b = (Mat_ScaLAPACK*)Bs->data; 624 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 625 #if defined(PETSC_USE_COMPLEX) 626 /* undo conjugation */ 627 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 628 #endif 629 Bs->assembled = PETSC_TRUE; 630 PetscFunctionReturn(0); 631 } 632 633 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 634 { 635 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 636 PetscInt i,j; 637 638 PetscFunctionBegin; 639 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 640 PetscFunctionReturn(0); 641 } 642 643 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 644 { 645 PetscErrorCode ierr; 646 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 647 Mat Bs = *B; 648 PetscBLASInt one=1; 649 PetscScalar sone=1.0,zero=0.0; 650 651 PetscFunctionBegin; 652 if (reuse == MAT_INITIAL_MATRIX) { 653 ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr); 654 *B = Bs; 655 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 656 b = (Mat_ScaLAPACK*)Bs->data; 657 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 658 Bs->assembled = PETSC_TRUE; 659 PetscFunctionReturn(0); 660 } 661 662 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 663 { 664 PetscErrorCode ierr; 665 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 666 PetscScalar *x,*x2d; 667 const PetscInt *ranges; 668 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 669 670 PetscFunctionBegin; 671 ierr = VecCopy(B,X);CHKERRQ(ierr); 672 ierr = VecGetArray(X,&x);CHKERRQ(ierr); 673 674 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 675 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 676 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 677 xlld = PetscMax(1,A->rmap->n); 678 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 679 PetscCheckScaLapackInfo("descinit",info); 680 681 /* allocate 2d vector */ 682 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 683 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 684 xlld = PetscMax(1,lszx); 685 686 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 687 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 688 PetscCheckScaLapackInfo("descinit",info); 689 690 /* redistribute x as a column of a 2d matrix */ 691 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 692 693 /* call ScaLAPACK subroutine */ 694 switch (A->factortype) { 695 case MAT_FACTOR_LU: 696 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 697 PetscCheckScaLapackInfo("getrs",info); 698 break; 699 case MAT_FACTOR_CHOLESKY: 700 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 701 PetscCheckScaLapackInfo("potrs",info); 702 break; 703 default: 704 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 705 } 706 707 /* redistribute x from a column of a 2d matrix */ 708 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 709 710 ierr = PetscFree(x2d);CHKERRQ(ierr); 711 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 716 { 717 PetscErrorCode ierr; 718 719 PetscFunctionBegin; 720 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 721 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 726 { 727 PetscErrorCode ierr; 728 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 729 PetscBool flg1,flg2; 730 PetscBLASInt one=1,info; 731 732 PetscFunctionBegin; 733 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 734 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 735 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 736 MatScaLAPACKCheckDistribution(B,1,X,2); 737 b = (Mat_ScaLAPACK*)B->data; 738 x = (Mat_ScaLAPACK*)X->data; 739 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 740 741 switch (A->factortype) { 742 case MAT_FACTOR_LU: 743 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 744 PetscCheckScaLapackInfo("getrs",info); 745 break; 746 case MAT_FACTOR_CHOLESKY: 747 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 748 PetscCheckScaLapackInfo("potrs",info); 749 break; 750 default: 751 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 757 { 758 PetscErrorCode ierr; 759 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 760 PetscBLASInt one=1,info; 761 762 PetscFunctionBegin; 763 if (!a->pivots) { 764 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 765 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 766 } 767 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 768 PetscCheckScaLapackInfo("getrf",info); 769 A->factortype = MAT_FACTOR_LU; 770 A->assembled = PETSC_TRUE; 771 772 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 773 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 778 { 779 PetscErrorCode ierr; 780 781 PetscFunctionBegin; 782 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 783 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 788 { 789 PetscFunctionBegin; 790 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 791 PetscFunctionReturn(0); 792 } 793 794 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 795 { 796 PetscErrorCode ierr; 797 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 798 PetscBLASInt one=1,info; 799 800 PetscFunctionBegin; 801 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 802 PetscCheckScaLapackInfo("potrf",info); 803 A->factortype = MAT_FACTOR_CHOLESKY; 804 A->assembled = PETSC_TRUE; 805 806 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 807 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 808 PetscFunctionReturn(0); 809 } 810 811 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 812 { 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 817 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 822 { 823 PetscFunctionBegin; 824 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 829 { 830 PetscFunctionBegin; 831 *type = MATSOLVERSCALAPACK; 832 PetscFunctionReturn(0); 833 } 834 835 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 836 { 837 Mat B; 838 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* Create the factorization matrix */ 843 ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr); 844 B->trivialsymbolic = PETSC_TRUE; 845 B->factortype = ftype; 846 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 847 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 848 849 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 850 *F = B; 851 PetscFunctionReturn(0); 852 } 853 854 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 855 { 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 860 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 865 { 866 PetscErrorCode ierr; 867 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 868 PetscBLASInt one=1,lwork=0; 869 const char *ntype; 870 PetscScalar *work=NULL,dummy; 871 872 PetscFunctionBegin; 873 switch (type){ 874 case NORM_1: 875 ntype = "1"; 876 lwork = PetscMax(a->locr,a->locc); 877 break; 878 case NORM_FROBENIUS: 879 ntype = "F"; 880 work = &dummy; 881 break; 882 case NORM_INFINITY: 883 ntype = "I"; 884 lwork = PetscMax(a->locr,a->locc); 885 break; 886 default: 887 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 888 } 889 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 890 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 891 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 892 PetscFunctionReturn(0); 893 } 894 895 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 896 { 897 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 906 { 907 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 908 PetscErrorCode ierr; 909 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 910 911 PetscFunctionBegin; 912 if (rows) { 913 n = a->locr; 914 nb = a->mb; 915 isrc = a->rsrc; 916 nproc = a->grid->nprow; 917 iproc = a->grid->myrow; 918 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 919 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 920 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 921 } 922 if (cols) { 923 n = a->locc; 924 nb = a->nb; 925 isrc = a->csrc; 926 nproc = a->grid->npcol; 927 iproc = a->grid->mycol; 928 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 929 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 930 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 936 { 937 PetscErrorCode ierr; 938 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 939 Mat Bmpi; 940 MPI_Comm comm; 941 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 942 const PetscInt *ranges,*branges,*cwork; 943 const PetscScalar *vwork; 944 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 945 PetscScalar *barray; 946 PetscBool differ=PETSC_FALSE; 947 PetscMPIInt size; 948 949 PetscFunctionBegin; 950 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 951 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 952 953 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 954 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 955 ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr); 956 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 957 } 958 959 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 960 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 961 m = PETSC_DECIDE; 962 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 963 n = PETSC_DECIDE; 964 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 965 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 966 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 967 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 968 969 /* create ScaLAPACK descriptor for B (1d block distribution) */ 970 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 971 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 972 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 973 PetscCheckScaLapackInfo("descinit",info); 974 975 /* redistribute matrix */ 976 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 977 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 978 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 982 /* transfer rows of auxiliary matrix to the final matrix B */ 983 ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr); 984 for (i=rstart;i<rend;i++) { 985 ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 986 ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 987 ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 988 } 989 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatDestroy(&Bmpi);CHKERRQ(ierr); 992 993 } else { /* normal cases */ 994 995 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 996 else { 997 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 998 m = PETSC_DECIDE; 999 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1000 n = PETSC_DECIDE; 1001 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1002 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1003 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 1004 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1005 } 1006 1007 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1008 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 1009 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1010 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 1011 PetscCheckScaLapackInfo("descinit",info); 1012 1013 /* redistribute matrix */ 1014 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 1015 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1016 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 1017 1018 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1019 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1020 if (reuse == MAT_INPLACE_MATRIX) { 1021 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1022 } else *B = Bmpi; 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1028 { 1029 PetscErrorCode ierr; 1030 Mat_ScaLAPACK *b; 1031 Mat Bmpi; 1032 MPI_Comm comm; 1033 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1034 const PetscInt *ranges; 1035 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1036 PetscScalar *aarray; 1037 PetscInt lda; 1038 1039 PetscFunctionBegin; 1040 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1041 1042 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1043 else { 1044 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1045 m = PETSC_DECIDE; 1046 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1047 n = PETSC_DECIDE; 1048 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1049 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1050 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1051 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1052 } 1053 b = (Mat_ScaLAPACK*)Bmpi->data; 1054 1055 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1056 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1057 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1058 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1059 lld = PetscMax(lda,1); /* local leading dimension */ 1060 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1061 PetscCheckScaLapackInfo("descinit",info); 1062 1063 /* redistribute matrix */ 1064 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1065 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1066 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1067 1068 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1069 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1070 if (reuse == MAT_INPLACE_MATRIX) { 1071 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1072 } else *B = Bmpi; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1077 { 1078 Mat mat_scal; 1079 PetscErrorCode ierr; 1080 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1081 const PetscInt *cols; 1082 const PetscScalar *vals; 1083 1084 PetscFunctionBegin; 1085 if (reuse == MAT_REUSE_MATRIX) { 1086 mat_scal = *newmat; 1087 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1088 } else { 1089 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1090 m = PETSC_DECIDE; 1091 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1092 n = PETSC_DECIDE; 1093 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1094 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1095 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1096 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1097 } 1098 for (row=rstart;row<rend;row++) { 1099 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1100 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1101 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1102 } 1103 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1104 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1105 1106 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1107 else *newmat = mat_scal; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1112 { 1113 Mat mat_scal; 1114 PetscErrorCode ierr; 1115 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1116 const PetscInt *cols; 1117 const PetscScalar *vals; 1118 PetscScalar v; 1119 1120 PetscFunctionBegin; 1121 if (reuse == MAT_REUSE_MATRIX) { 1122 mat_scal = *newmat; 1123 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1124 } else { 1125 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1126 m = PETSC_DECIDE; 1127 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1128 n = PETSC_DECIDE; 1129 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1130 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1131 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1132 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1133 } 1134 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1135 for (row=rstart;row<rend;row++) { 1136 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1137 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1138 for (j=0;j<ncols;j++) { /* lower triangular part */ 1139 if (cols[j] == row) continue; 1140 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1141 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1142 } 1143 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1144 } 1145 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1146 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1147 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1148 1149 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1150 else *newmat = mat_scal; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1155 { 1156 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1157 PetscErrorCode ierr; 1158 PetscInt sz=0; 1159 1160 PetscFunctionBegin; 1161 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1162 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1163 if (!a->lld) a->lld = a->locr; 1164 1165 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1166 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1167 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1168 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1169 1170 A->preallocated = PETSC_TRUE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1175 { 1176 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1177 PetscErrorCode ierr; 1178 Mat_ScaLAPACK_Grid *grid; 1179 PetscBool flg; 1180 MPI_Comm icomm; 1181 1182 PetscFunctionBegin; 1183 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1184 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1185 ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1186 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1187 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1188 if (--grid->grid_refct == 0) { 1189 Cblacs_gridexit(grid->ictxt); 1190 Cblacs_gridexit(grid->ictxrow); 1191 Cblacs_gridexit(grid->ictxcol); 1192 ierr = PetscFree(grid);CHKERRQ(ierr); 1193 ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 1194 } 1195 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1196 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1199 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1200 ierr = PetscFree(A->data);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1205 { 1206 PetscErrorCode ierr; 1207 const PetscInt *ranges; 1208 PetscMPIInt size; 1209 PetscInt i,n; 1210 1211 PetscFunctionBegin; 1212 ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr); 1213 if (size>2) { 1214 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1215 n = ranges[1]-ranges[0]; 1216 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1217 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1223 { 1224 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1225 PetscErrorCode ierr; 1226 PetscBLASInt info=0; 1227 1228 PetscFunctionBegin; 1229 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1230 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1231 1232 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1233 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1234 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1235 1236 /* compute local sizes */ 1237 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1238 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1239 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1240 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1241 a->lld = PetscMax(1,a->locr); 1242 1243 /* allocate local array */ 1244 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1245 1246 /* set up ScaLAPACK descriptor */ 1247 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1248 PetscCheckScaLapackInfo("descinit",info); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1253 { 1254 PetscErrorCode ierr; 1255 PetscInt nstash,reallocs; 1256 1257 PetscFunctionBegin; 1258 if (A->nooffprocentries) PetscFunctionReturn(0); 1259 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1260 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1261 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1266 { 1267 PetscErrorCode ierr; 1268 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1269 PetscMPIInt n; 1270 PetscInt i,flg,*row,*col; 1271 PetscScalar *val; 1272 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1273 1274 PetscFunctionBegin; 1275 if (A->nooffprocentries) PetscFunctionReturn(0); 1276 while (1) { 1277 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1278 if (!flg) break; 1279 for (i=0;i<n;i++) { 1280 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1281 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1282 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1283 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1284 switch (A->insertmode) { 1285 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1286 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1287 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1288 } 1289 } 1290 } 1291 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1296 { 1297 PetscErrorCode ierr; 1298 Mat Adense,As; 1299 MPI_Comm comm; 1300 1301 PetscFunctionBegin; 1302 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1303 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1304 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1305 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1306 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1307 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1308 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 /* -------------------------------------------------------------------*/ 1313 static struct _MatOps MatOps_Values = { 1314 MatSetValues_ScaLAPACK, 1315 0, 1316 0, 1317 MatMult_ScaLAPACK, 1318 /* 4*/ MatMultAdd_ScaLAPACK, 1319 MatMultTranspose_ScaLAPACK, 1320 MatMultTransposeAdd_ScaLAPACK, 1321 MatSolve_ScaLAPACK, 1322 MatSolveAdd_ScaLAPACK, 1323 0, 1324 /*10*/ 0, 1325 MatLUFactor_ScaLAPACK, 1326 MatCholeskyFactor_ScaLAPACK, 1327 0, 1328 MatTranspose_ScaLAPACK, 1329 /*15*/ MatGetInfo_ScaLAPACK, 1330 0, 1331 MatGetDiagonal_ScaLAPACK, 1332 MatDiagonalScale_ScaLAPACK, 1333 MatNorm_ScaLAPACK, 1334 /*20*/ MatAssemblyBegin_ScaLAPACK, 1335 MatAssemblyEnd_ScaLAPACK, 1336 MatSetOption_ScaLAPACK, 1337 MatZeroEntries_ScaLAPACK, 1338 /*24*/ 0, 1339 MatLUFactorSymbolic_ScaLAPACK, 1340 MatLUFactorNumeric_ScaLAPACK, 1341 MatCholeskyFactorSymbolic_ScaLAPACK, 1342 MatCholeskyFactorNumeric_ScaLAPACK, 1343 /*29*/ MatSetUp_ScaLAPACK, 1344 0, 1345 0, 1346 0, 1347 0, 1348 /*34*/ MatDuplicate_ScaLAPACK, 1349 0, 1350 0, 1351 0, 1352 0, 1353 /*39*/ MatAXPY_ScaLAPACK, 1354 0, 1355 0, 1356 0, 1357 MatCopy_ScaLAPACK, 1358 /*44*/ 0, 1359 MatScale_ScaLAPACK, 1360 MatShift_ScaLAPACK, 1361 0, 1362 0, 1363 /*49*/ 0, 1364 0, 1365 0, 1366 0, 1367 0, 1368 /*54*/ 0, 1369 0, 1370 0, 1371 0, 1372 0, 1373 /*59*/ 0, 1374 MatDestroy_ScaLAPACK, 1375 MatView_ScaLAPACK, 1376 0, 1377 0, 1378 /*64*/ 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 /*69*/ 0, 1384 0, 1385 MatConvert_ScaLAPACK_Dense, 1386 0, 1387 0, 1388 /*74*/ 0, 1389 0, 1390 0, 1391 0, 1392 0, 1393 /*79*/ 0, 1394 0, 1395 0, 1396 0, 1397 MatLoad_ScaLAPACK, 1398 /*84*/ 0, 1399 0, 1400 0, 1401 0, 1402 0, 1403 /*89*/ 0, 1404 0, 1405 MatMatMultNumeric_ScaLAPACK, 1406 0, 1407 0, 1408 /*94*/ 0, 1409 0, 1410 0, 1411 MatMatTransposeMultNumeric_ScaLAPACK, 1412 0, 1413 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1414 0, 1415 0, 1416 MatConjugate_ScaLAPACK, 1417 0, 1418 /*104*/0, 1419 0, 1420 0, 1421 0, 1422 0, 1423 /*109*/MatMatSolve_ScaLAPACK, 1424 0, 1425 0, 1426 0, 1427 MatMissingDiagonal_ScaLAPACK, 1428 /*114*/0, 1429 0, 1430 0, 1431 0, 1432 0, 1433 /*119*/0, 1434 MatHermitianTranspose_ScaLAPACK, 1435 0, 1436 0, 1437 0, 1438 /*124*/0, 1439 0, 1440 0, 1441 0, 1442 0, 1443 /*129*/0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 /*134*/0, 1449 0, 1450 0, 1451 0, 1452 0, 1453 0, 1454 /*140*/0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 /*145*/0, 1460 0, 1461 0 1462 }; 1463 1464 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1465 { 1466 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1467 PetscInt size=stash->size,nsends; 1468 PetscErrorCode ierr; 1469 PetscInt count,*sindices,**rindices,i,j,l; 1470 PetscScalar **rvalues,*svalues; 1471 MPI_Comm comm = stash->comm; 1472 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1473 PetscMPIInt *sizes,*nlengths,nreceives; 1474 PetscInt *sp_idx,*sp_idy; 1475 PetscScalar *sp_val; 1476 PetscMatStashSpace space,space_next; 1477 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1478 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1479 1480 PetscFunctionBegin; 1481 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1482 InsertMode addv; 1483 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1484 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1485 mat->insertmode = addv; /* in case this processor had no cache */ 1486 } 1487 1488 bs2 = stash->bs*stash->bs; 1489 1490 /* first count number of contributors to each processor */ 1491 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1492 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1493 1494 i = j = 0; 1495 space = stash->space_head; 1496 while (space) { 1497 space_next = space->next; 1498 for (l=0; l<space->local_used; l++) { 1499 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1500 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1501 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1502 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1503 nlengths[j]++; owner[i] = j; 1504 i++; 1505 } 1506 space = space_next; 1507 } 1508 1509 /* Now check what procs get messages - and compute nsends. */ 1510 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1511 for (i=0, nsends=0; i<size; i++) { 1512 if (nlengths[i]) { 1513 sizes[i] = 1; nsends++; 1514 } 1515 } 1516 1517 {PetscMPIInt *onodes,*olengths; 1518 /* Determine the number of messages to expect, their lengths, from from-ids */ 1519 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1520 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1521 /* since clubbing row,col - lengths are multiplied by 2 */ 1522 for (i=0; i<nreceives; i++) olengths[i] *=2; 1523 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1524 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1525 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1526 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1527 ierr = PetscFree(onodes);CHKERRQ(ierr); 1528 ierr = PetscFree(olengths);CHKERRQ(ierr);} 1529 1530 /* do sends: 1531 1) starts[i] gives the starting index in svalues for stuff going to 1532 the ith processor 1533 */ 1534 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1535 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1536 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1537 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1538 startv[0] = 0; starti[0] = 0; 1539 for (i=1; i<size; i++) { 1540 startv[i] = startv[i-1] + nlengths[i-1]; 1541 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1542 } 1543 1544 i = 0; 1545 space = stash->space_head; 1546 while (space) { 1547 space_next = space->next; 1548 sp_idx = space->idx; 1549 sp_idy = space->idy; 1550 sp_val = space->val; 1551 for (l=0; l<space->local_used; l++) { 1552 j = owner[i]; 1553 if (bs2 == 1) { 1554 svalues[startv[j]] = sp_val[l]; 1555 } else { 1556 PetscInt k; 1557 PetscScalar *buf1,*buf2; 1558 buf1 = svalues+bs2*startv[j]; 1559 buf2 = space->val + bs2*l; 1560 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1561 } 1562 sindices[starti[j]] = sp_idx[l]; 1563 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1564 startv[j]++; 1565 starti[j]++; 1566 i++; 1567 } 1568 space = space_next; 1569 } 1570 startv[0] = 0; 1571 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1572 1573 for (i=0,count=0; i<size; i++) { 1574 if (sizes[i]) { 1575 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr); 1576 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr); 1577 } 1578 } 1579 #if defined(PETSC_USE_INFO) 1580 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1581 for (i=0; i<size; i++) { 1582 if (sizes[i]) { 1583 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1584 } 1585 } 1586 #endif 1587 ierr = PetscFree(nlengths);CHKERRQ(ierr); 1588 ierr = PetscFree(owner);CHKERRQ(ierr); 1589 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1590 ierr = PetscFree(sizes);CHKERRQ(ierr); 1591 1592 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1593 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1594 1595 for (i=0; i<nreceives; i++) { 1596 recv_waits[2*i] = recv_waits1[i]; 1597 recv_waits[2*i+1] = recv_waits2[i]; 1598 } 1599 stash->recv_waits = recv_waits; 1600 1601 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1602 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1603 1604 stash->svalues = svalues; 1605 stash->sindices = sindices; 1606 stash->rvalues = rvalues; 1607 stash->rindices = rindices; 1608 stash->send_waits = send_waits; 1609 stash->nsends = nsends; 1610 stash->nrecvs = nreceives; 1611 stash->reproduce_count = 0; 1612 PetscFunctionReturn(0); 1613 } 1614 1615 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1616 { 1617 PetscErrorCode ierr; 1618 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1619 1620 PetscFunctionBegin; 1621 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1622 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1623 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1624 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1625 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 /*@ 1630 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1631 the ScaLAPACK matrix 1632 1633 Logically Collective on A 1634 1635 Input Parameter: 1636 + A - a MATSCALAPACK matrix 1637 . mb - the row block size 1638 - nb - the column block size 1639 1640 Level: intermediate 1641 1642 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1643 @*/ 1644 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1645 { 1646 PetscErrorCode ierr; 1647 1648 PetscFunctionBegin; 1649 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1650 PetscValidLogicalCollectiveInt(A,mb,2); 1651 PetscValidLogicalCollectiveInt(A,nb,3); 1652 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1657 { 1658 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1659 1660 PetscFunctionBegin; 1661 if (mb) *mb = a->mb; 1662 if (nb) *nb = a->nb; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 /*@ 1667 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1668 the ScaLAPACK matrix 1669 1670 Not collective 1671 1672 Input Parameter: 1673 . A - a MATSCALAPACK matrix 1674 1675 Output Parameters: 1676 + mb - the row block size 1677 - nb - the column block size 1678 1679 Level: intermediate 1680 1681 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1682 @*/ 1683 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1684 { 1685 PetscErrorCode ierr; 1686 1687 PetscFunctionBegin; 1688 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1689 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 1693 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1694 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1695 1696 /*MC 1697 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1698 1699 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1700 1701 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1702 1703 Options Database Keys: 1704 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1705 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1706 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1707 1708 Level: beginner 1709 1710 .seealso: MATDENSE, MATELEMENTAL 1711 M*/ 1712 1713 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1714 { 1715 Mat_ScaLAPACK *a; 1716 PetscErrorCode ierr; 1717 PetscBool flg,flg1; 1718 Mat_ScaLAPACK_Grid *grid; 1719 MPI_Comm icomm; 1720 PetscBLASInt nprow,npcol,myrow,mycol; 1721 PetscInt optv1,k=2,array[2]={0,0}; 1722 PetscMPIInt size; 1723 1724 PetscFunctionBegin; 1725 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1726 A->insertmode = NOT_SET_VALUES; 1727 1728 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1729 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1730 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1731 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1732 A->stash.ScatterDestroy = NULL; 1733 1734 ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1735 A->data = (void*)a; 1736 1737 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1738 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1739 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr); 1740 ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr); 1741 } 1742 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1743 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1744 if (!flg) { 1745 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1746 1747 ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr); 1748 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1749 1750 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1751 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1752 if (flg1) { 1753 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1754 grid->nprow = optv1; 1755 } 1756 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1757 1758 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1759 grid->npcol = size/grid->nprow; 1760 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1761 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1762 grid->ictxt = Csys2blacs_handle(icomm); 1763 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1764 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1765 grid->grid_refct = 1; 1766 grid->nprow = nprow; 1767 grid->npcol = npcol; 1768 grid->myrow = myrow; 1769 grid->mycol = mycol; 1770 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1771 grid->ictxrow = Csys2blacs_handle(icomm); 1772 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1773 grid->ictxcol = Csys2blacs_handle(icomm); 1774 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1775 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr); 1776 1777 } else grid->grid_refct++; 1778 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1779 a->grid = grid; 1780 a->mb = DEFAULT_BLOCKSIZE; 1781 a->nb = DEFAULT_BLOCKSIZE; 1782 1783 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1784 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1785 if (flg) { 1786 a->mb = array[0]; 1787 a->nb = (k>1)? array[1]: a->mb; 1788 } 1789 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1790 1791 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1792 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1793 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1794 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /*@C 1799 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1800 (2D block cyclic distribution). 1801 1802 Collective 1803 1804 Input Parameters: 1805 + comm - MPI communicator 1806 . mb - row block size (or PETSC_DECIDE to have it set) 1807 . nb - column block size (or PETSC_DECIDE to have it set) 1808 . M - number of global rows 1809 . N - number of global columns 1810 . rsrc - coordinate of process that owns the first row of the distributed matrix 1811 - csrc - coordinate of process that owns the first column of the distributed matrix 1812 1813 Output Parameter: 1814 . A - the matrix 1815 1816 Options Database Keys: 1817 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1818 1819 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1820 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1821 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1822 1823 Notes: 1824 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1825 is chosen. 1826 1827 Storage Information: 1828 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1829 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1830 significance and are thus ignored. The block sizes refer to the values 1831 used for the distributed matrix, not the same meaning as in BAIJ. 1832 1833 Level: intermediate 1834 1835 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1836 @*/ 1837 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1838 { 1839 PetscErrorCode ierr; 1840 Mat_ScaLAPACK *a; 1841 PetscInt m,n; 1842 1843 PetscFunctionBegin; 1844 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1845 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1846 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1847 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1848 m = PETSC_DECIDE; 1849 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1850 n = PETSC_DECIDE; 1851 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1852 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1853 a = (Mat_ScaLAPACK*)(*A)->data; 1854 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1856 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1857 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1858 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1859 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1860 ierr = MatSetUp(*A);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 1864