1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 #define DEFAULT_BLOCKSIZE 64 4 5 /* 6 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8 */ 9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10 11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 12 { 13 PetscErrorCode ierr; 14 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 15 PetscBool iascii; 16 PetscViewerFormat format; 17 Mat Adense; 18 19 PetscFunctionBegin; 20 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 21 if (iascii) { 22 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 23 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 24 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 25 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 26 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 27 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 30 PetscFunctionReturn(0); 31 } 32 } 33 /* convert to dense format and call MatView() */ 34 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 35 ierr = MatView(Adense,viewer);CHKERRQ(ierr); 36 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 37 PetscFunctionReturn(0); 38 } 39 40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 41 { 42 PetscErrorCode ierr; 43 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 44 PetscLogDouble isend[2],irecv[2]; 45 46 PetscFunctionBegin; 47 info->block_size = 1.0; 48 49 isend[0] = a->lld*a->locc; /* locally allocated */ 50 isend[1] = a->locr*a->locc; /* used submatrix */ 51 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 52 info->nz_allocated = isend[0]; 53 info->nz_used = isend[1]; 54 } else if (flag == MAT_GLOBAL_MAX) { 55 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 56 info->nz_allocated = irecv[0]; 57 info->nz_used = irecv[1]; 58 } else if (flag == MAT_GLOBAL_SUM) { 59 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 60 info->nz_allocated = irecv[0]; 61 info->nz_used = irecv[1]; 62 } 63 64 info->nz_unneeded = 0; 65 info->assemblies = A->num_ass; 66 info->mallocs = 0; 67 info->memory = ((PetscObject)A)->mem; 68 info->fill_ratio_given = 0; 69 info->fill_ratio_needed = 0; 70 info->factor_mallocs = 0; 71 PetscFunctionReturn(0); 72 } 73 74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 75 { 76 PetscFunctionBegin; 77 switch (op) { 78 case MAT_NEW_NONZERO_LOCATIONS: 79 case MAT_NEW_NONZERO_LOCATION_ERR: 80 case MAT_NEW_NONZERO_ALLOCATION_ERR: 81 case MAT_SYMMETRIC: 82 case MAT_SORTED_FULL: 83 case MAT_HERMITIAN: 84 break; 85 default: 86 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94 PetscErrorCode ierr; 95 PetscInt i,j; 96 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 97 98 PetscFunctionBegin; 99 for (i=0;i<nr;i++) { 100 if (rows[i] < 0) continue; 101 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 102 for (j=0;j<nc;j++) { 103 if (cols[j] < 0) continue; 104 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 105 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 106 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 107 switch (imode) { 108 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 109 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 110 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 111 } 112 } else { 113 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 114 A->assembled = PETSC_FALSE; 115 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 116 } 117 } 118 } 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 123 { 124 PetscErrorCode ierr; 125 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 126 PetscScalar *x2d,*y2d,alpha=1.0; 127 const PetscInt *ranges; 128 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 129 130 PetscFunctionBegin; 131 if (transpose) { 132 133 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 134 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 135 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 136 xlld = PetscMax(1,A->rmap->n); 137 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 138 PetscCheckScaLapackInfo("descinit",info); 139 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 140 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 141 ylld = 1; 142 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 143 PetscCheckScaLapackInfo("descinit",info); 144 145 /* allocate 2d vectors */ 146 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 147 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 148 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 149 xlld = PetscMax(1,lszx); 150 151 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 153 PetscCheckScaLapackInfo("descinit",info); 154 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 155 PetscCheckScaLapackInfo("descinit",info); 156 157 /* redistribute x as a column of a 2d matrix */ 158 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 159 160 /* redistribute y as a row of a 2d matrix */ 161 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 162 163 /* call PBLAS subroutine */ 164 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 165 166 /* redistribute y from a row of a 2d matrix */ 167 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 168 169 } else { /* non-transpose */ 170 171 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 172 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 173 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 174 xlld = 1; 175 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 176 PetscCheckScaLapackInfo("descinit",info); 177 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 178 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 179 ylld = PetscMax(1,A->rmap->n); 180 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 181 PetscCheckScaLapackInfo("descinit",info); 182 183 /* allocate 2d vectors */ 184 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 185 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 186 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 187 ylld = PetscMax(1,lszy); 188 189 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 191 PetscCheckScaLapackInfo("descinit",info); 192 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 193 PetscCheckScaLapackInfo("descinit",info); 194 195 /* redistribute x as a row of a 2d matrix */ 196 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 197 198 /* redistribute y as a column of a 2d matrix */ 199 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 200 201 /* call PBLAS subroutine */ 202 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 203 204 /* redistribute y from a column of a 2d matrix */ 205 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 206 207 } 208 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 213 { 214 PetscErrorCode ierr; 215 const PetscScalar *xarray; 216 PetscScalar *yarray; 217 218 PetscFunctionBegin; 219 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 220 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 221 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 222 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 223 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 228 { 229 PetscErrorCode ierr; 230 const PetscScalar *xarray; 231 PetscScalar *yarray; 232 233 PetscFunctionBegin; 234 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 235 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 236 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 238 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 243 { 244 PetscErrorCode ierr; 245 const PetscScalar *xarray; 246 PetscScalar *zarray; 247 248 PetscFunctionBegin; 249 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 250 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 251 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 252 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 253 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 254 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 259 { 260 PetscErrorCode ierr; 261 const PetscScalar *xarray; 262 PetscScalar *zarray; 263 264 PetscFunctionBegin; 265 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 266 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 267 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 268 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 269 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 270 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 275 { 276 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 277 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 278 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 279 PetscScalar sone=1.0,zero=0.0; 280 PetscBLASInt one=1; 281 282 PetscFunctionBegin; 283 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 284 C->assembled = PETSC_TRUE; 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 289 { 290 PetscErrorCode ierr; 291 292 PetscFunctionBegin; 293 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 294 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 295 ierr = MatSetUp(C);CHKERRQ(ierr); 296 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 301 { 302 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 303 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 304 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 305 PetscScalar sone=1.0,zero=0.0; 306 PetscBLASInt one=1; 307 308 PetscFunctionBegin; 309 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 310 C->assembled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 315 { 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 320 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 321 ierr = MatSetUp(C);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 /* --------------------------------------- */ 326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 327 { 328 PetscFunctionBegin; 329 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 330 C->ops->productsymbolic = MatProductSymbolic_AB; 331 PetscFunctionReturn(0); 332 } 333 334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 335 { 336 PetscFunctionBegin; 337 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 338 C->ops->productsymbolic = MatProductSymbolic_ABt; 339 PetscFunctionReturn(0); 340 } 341 342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 343 { 344 PetscErrorCode ierr; 345 Mat_Product *product = C->product; 346 347 PetscFunctionBegin; 348 switch (product->type) { 349 case MATPRODUCT_AB: 350 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 351 break; 352 case MATPRODUCT_ABt: 353 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 354 break; 355 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 356 } 357 PetscFunctionReturn(0); 358 } 359 /* --------------------------------------- */ 360 361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 362 { 363 PetscErrorCode ierr; 364 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 365 PetscScalar *darray,*d2d,v; 366 const PetscInt *ranges; 367 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 368 369 PetscFunctionBegin; 370 ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 371 372 if (A->rmap->N<=A->cmap->N) { /* row version */ 373 374 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 375 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 376 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 377 dlld = PetscMax(1,A->rmap->n); 378 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 379 PetscCheckScaLapackInfo("descinit",info); 380 381 /* allocate 2d vector */ 382 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 383 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 384 dlld = PetscMax(1,lszd); 385 386 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 387 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 388 PetscCheckScaLapackInfo("descinit",info); 389 390 /* collect diagonal */ 391 for (j=1;j<=a->M;j++) { 392 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 393 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 394 } 395 396 /* redistribute d from a column of a 2d matrix */ 397 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 398 ierr = PetscFree(d2d);CHKERRQ(ierr); 399 400 } else { /* column version */ 401 402 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 403 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 404 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 405 dlld = 1; 406 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 407 PetscCheckScaLapackInfo("descinit",info); 408 409 /* allocate 2d vector */ 410 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 411 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 412 413 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 414 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 415 PetscCheckScaLapackInfo("descinit",info); 416 417 /* collect diagonal */ 418 for (j=1;j<=a->N;j++) { 419 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 420 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 421 } 422 423 /* redistribute d from a row of a 2d matrix */ 424 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 425 ierr = PetscFree(d2d);CHKERRQ(ierr); 426 } 427 428 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 435 { 436 PetscErrorCode ierr; 437 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 438 const PetscScalar *d; 439 const PetscInt *ranges; 440 PetscScalar *d2d; 441 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 442 443 PetscFunctionBegin; 444 if (R) { 445 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 446 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 447 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 448 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 449 dlld = 1; 450 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 451 PetscCheckScaLapackInfo("descinit",info); 452 453 /* allocate 2d vector */ 454 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 455 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 456 457 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 458 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 459 PetscCheckScaLapackInfo("descinit",info); 460 461 /* redistribute d to a row of a 2d matrix */ 462 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 463 464 /* broadcast along process columns */ 465 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 466 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 467 468 /* local scaling */ 469 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 470 471 ierr = PetscFree(d2d);CHKERRQ(ierr); 472 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 473 } 474 if (L) { 475 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 476 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 477 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 478 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 479 dlld = PetscMax(1,A->rmap->n); 480 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 481 PetscCheckScaLapackInfo("descinit",info); 482 483 /* allocate 2d vector */ 484 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 485 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 486 dlld = PetscMax(1,lszd); 487 488 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 489 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 490 PetscCheckScaLapackInfo("descinit",info); 491 492 /* redistribute d to a column of a 2d matrix */ 493 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 494 495 /* broadcast along process rows */ 496 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 497 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 498 499 /* local scaling */ 500 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 501 502 ierr = PetscFree(d2d);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 504 } 505 PetscFunctionReturn(0); 506 } 507 508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 509 { 510 PetscFunctionBegin; 511 *missing = PETSC_FALSE; 512 PetscFunctionReturn(0); 513 } 514 515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 516 { 517 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 518 PetscBLASInt n,one=1; 519 520 PetscFunctionBegin; 521 n = x->lld*x->locc; 522 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 523 PetscFunctionReturn(0); 524 } 525 526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 527 { 528 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 529 PetscBLASInt i,n; 530 PetscScalar v; 531 532 PetscFunctionBegin; 533 n = PetscMin(x->M,x->N); 534 for (i=1;i<=n;i++) { 535 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 536 v += alpha; 537 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 538 } 539 PetscFunctionReturn(0); 540 } 541 542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 543 { 544 PetscErrorCode ierr; 545 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 547 PetscBLASInt one=1; 548 PetscScalar beta=1.0; 549 550 PetscFunctionBegin; 551 MatScaLAPACKCheckDistribution(Y,1,X,3); 552 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 553 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 558 { 559 PetscErrorCode ierr; 560 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 561 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 562 563 PetscFunctionBegin; 564 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 565 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 570 { 571 Mat Bs; 572 MPI_Comm comm; 573 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 578 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 579 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 580 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 581 b = (Mat_ScaLAPACK*)Bs->data; 582 b->M = a->M; 583 b->N = a->N; 584 b->mb = a->mb; 585 b->nb = a->nb; 586 b->rsrc = a->rsrc; 587 b->csrc = a->csrc; 588 ierr = MatSetUp(Bs);CHKERRQ(ierr); 589 *B = Bs; 590 if (op == MAT_COPY_VALUES) { 591 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 592 } 593 Bs->assembled = PETSC_TRUE; 594 PetscFunctionReturn(0); 595 } 596 597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 598 { 599 PetscErrorCode ierr; 600 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 601 Mat Bs = *B; 602 PetscBLASInt one=1; 603 PetscScalar sone=1.0,zero=0.0; 604 #if defined(PETSC_USE_COMPLEX) 605 PetscInt i,j; 606 #endif 607 608 PetscFunctionBegin; 609 if (reuse == MAT_INITIAL_MATRIX) { 610 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 611 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 612 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 613 ierr = MatSetUp(Bs);CHKERRQ(ierr); 614 *B = Bs; 615 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 616 b = (Mat_ScaLAPACK*)Bs->data; 617 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 618 #if defined(PETSC_USE_COMPLEX) 619 /* undo conjugation */ 620 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 621 #endif 622 Bs->assembled = PETSC_TRUE; 623 PetscFunctionReturn(0); 624 } 625 626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 627 { 628 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 629 PetscInt i,j; 630 631 PetscFunctionBegin; 632 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 637 { 638 PetscErrorCode ierr; 639 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 640 Mat Bs = *B; 641 PetscBLASInt one=1; 642 PetscScalar sone=1.0,zero=0.0; 643 644 PetscFunctionBegin; 645 if (reuse == MAT_INITIAL_MATRIX) { 646 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 647 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 648 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 649 ierr = MatSetUp(Bs);CHKERRQ(ierr); 650 *B = Bs; 651 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 652 b = (Mat_ScaLAPACK*)Bs->data; 653 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 654 Bs->assembled = PETSC_TRUE; 655 PetscFunctionReturn(0); 656 } 657 658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 659 { 660 PetscErrorCode ierr; 661 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 662 PetscScalar *x,*x2d; 663 const PetscInt *ranges; 664 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 665 666 PetscFunctionBegin; 667 ierr = VecCopy(B,X);CHKERRQ(ierr); 668 ierr = VecGetArray(X,&x);CHKERRQ(ierr); 669 670 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 671 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 672 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 673 xlld = PetscMax(1,A->rmap->n); 674 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 675 PetscCheckScaLapackInfo("descinit",info); 676 677 /* allocate 2d vector */ 678 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 679 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 680 xlld = PetscMax(1,lszx); 681 682 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 683 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 684 PetscCheckScaLapackInfo("descinit",info); 685 686 /* redistribute x as a column of a 2d matrix */ 687 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 688 689 /* call ScaLAPACK subroutine */ 690 switch (A->factortype) { 691 case MAT_FACTOR_LU: 692 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 693 PetscCheckScaLapackInfo("getrs",info); 694 break; 695 case MAT_FACTOR_CHOLESKY: 696 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 697 PetscCheckScaLapackInfo("potrs",info); 698 break; 699 default: 700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 701 } 702 703 /* redistribute x from a column of a 2d matrix */ 704 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 705 706 ierr = PetscFree(x2d);CHKERRQ(ierr); 707 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 712 { 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 717 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 722 { 723 PetscErrorCode ierr; 724 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 725 PetscBool flg1,flg2; 726 PetscBLASInt one=1,info; 727 728 PetscFunctionBegin; 729 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 731 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 732 MatScaLAPACKCheckDistribution(B,1,X,2); 733 b = (Mat_ScaLAPACK*)B->data; 734 x = (Mat_ScaLAPACK*)X->data; 735 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 736 737 switch (A->factortype) { 738 case MAT_FACTOR_LU: 739 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 740 PetscCheckScaLapackInfo("getrs",info); 741 break; 742 case MAT_FACTOR_CHOLESKY: 743 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 744 PetscCheckScaLapackInfo("potrs",info); 745 break; 746 default: 747 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 748 } 749 PetscFunctionReturn(0); 750 } 751 752 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 753 { 754 PetscErrorCode ierr; 755 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 756 PetscBLASInt one=1,info; 757 758 PetscFunctionBegin; 759 if (!a->pivots) { 760 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 761 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 762 } 763 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 764 PetscCheckScaLapackInfo("getrf",info); 765 A->factortype = MAT_FACTOR_LU; 766 A->assembled = PETSC_TRUE; 767 768 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 769 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 773 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 774 { 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 779 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 784 { 785 PetscFunctionBegin; 786 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 787 PetscFunctionReturn(0); 788 } 789 790 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 791 { 792 PetscErrorCode ierr; 793 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 794 PetscBLASInt one=1,info; 795 796 PetscFunctionBegin; 797 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 798 PetscCheckScaLapackInfo("potrf",info); 799 A->factortype = MAT_FACTOR_CHOLESKY; 800 A->assembled = PETSC_TRUE; 801 802 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 803 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 807 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 808 { 809 PetscErrorCode ierr; 810 811 PetscFunctionBegin; 812 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 813 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 818 { 819 PetscFunctionBegin; 820 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 825 { 826 PetscFunctionBegin; 827 *type = MATSOLVERSCALAPACK; 828 PetscFunctionReturn(0); 829 } 830 831 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 832 { 833 Mat B; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* Create the factorization matrix */ 838 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 839 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 840 ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr); 841 ierr = MatSetUp(B);CHKERRQ(ierr); 842 B->factortype = ftype; 843 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 844 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 845 846 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 847 *F = B; 848 PetscFunctionReturn(0); 849 } 850 851 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 852 { 853 PetscErrorCode ierr; 854 855 PetscFunctionBegin; 856 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 857 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 862 { 863 PetscErrorCode ierr; 864 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 865 PetscBLASInt one=1,lwork=0; 866 const char *ntype; 867 PetscScalar *work=NULL,dummy; 868 869 PetscFunctionBegin; 870 switch (type){ 871 case NORM_1: 872 ntype = "1"; 873 lwork = PetscMax(a->locr,a->locc); 874 break; 875 case NORM_FROBENIUS: 876 ntype = "F"; 877 work = &dummy; 878 break; 879 case NORM_INFINITY: 880 ntype = "I"; 881 lwork = PetscMax(a->locr,a->locc); 882 break; 883 default: 884 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 885 } 886 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 887 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 888 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 889 PetscFunctionReturn(0); 890 } 891 892 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 893 { 894 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 903 { 904 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 905 PetscErrorCode ierr; 906 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 907 908 PetscFunctionBegin; 909 if (rows) { 910 n = a->locr; 911 nb = a->mb; 912 isrc = a->rsrc; 913 nproc = a->grid->nprow; 914 iproc = a->grid->myrow; 915 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 916 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 917 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 918 } 919 if (cols) { 920 n = a->locc; 921 nb = a->nb; 922 isrc = a->csrc; 923 nproc = a->grid->npcol; 924 iproc = a->grid->mycol; 925 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 926 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 927 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 933 { 934 PetscErrorCode ierr; 935 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 936 Mat Bmpi; 937 MPI_Comm comm; 938 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 939 const PetscInt *ranges,*branges,*cwork; 940 const PetscScalar *vwork; 941 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 942 PetscScalar *barray; 943 PetscBool differ=PETSC_FALSE; 944 PetscMPIInt size; 945 946 PetscFunctionBegin; 947 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 948 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 949 950 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 952 ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr); 953 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 954 } 955 956 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 957 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 958 m = PETSC_DECIDE; 959 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 960 n = PETSC_DECIDE; 961 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 962 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 963 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 964 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 965 966 /* create ScaLAPACK descriptor for B (1d block distribution) */ 967 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 968 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 969 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 970 PetscCheckScaLapackInfo("descinit",info); 971 972 /* redistribute matrix */ 973 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 974 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 975 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 979 /* transfer rows of auxiliary matrix to the final matrix B */ 980 ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr); 981 for (i=rstart;i<rend;i++) { 982 ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 983 ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 984 ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 985 } 986 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 ierr = MatDestroy(&Bmpi);CHKERRQ(ierr); 989 990 } else { /* normal cases */ 991 992 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 993 else { 994 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 995 m = PETSC_DECIDE; 996 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 997 n = PETSC_DECIDE; 998 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 999 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1000 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 1001 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1002 } 1003 1004 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1005 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 1006 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1007 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 1008 PetscCheckScaLapackInfo("descinit",info); 1009 1010 /* redistribute matrix */ 1011 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 1012 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1013 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 1014 1015 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1016 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1017 if (reuse == MAT_INPLACE_MATRIX) { 1018 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1019 } else *B = Bmpi; 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1025 { 1026 PetscErrorCode ierr; 1027 Mat_ScaLAPACK *b; 1028 Mat Bmpi; 1029 MPI_Comm comm; 1030 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1031 const PetscInt *ranges; 1032 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1033 PetscScalar *aarray; 1034 PetscInt lda; 1035 1036 PetscFunctionBegin; 1037 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1038 1039 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1040 else { 1041 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1042 m = PETSC_DECIDE; 1043 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1044 n = PETSC_DECIDE; 1045 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1046 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1047 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1048 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1049 } 1050 b = (Mat_ScaLAPACK*)Bmpi->data; 1051 1052 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1053 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1054 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1055 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1056 lld = PetscMax(lda,1); /* local leading dimension */ 1057 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1058 PetscCheckScaLapackInfo("descinit",info); 1059 1060 /* redistribute matrix */ 1061 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1062 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1063 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1064 1065 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1066 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1067 if (reuse == MAT_INPLACE_MATRIX) { 1068 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1069 } else *B = Bmpi; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1074 { 1075 Mat mat_scal; 1076 PetscErrorCode ierr; 1077 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1078 const PetscInt *cols; 1079 const PetscScalar *vals; 1080 1081 PetscFunctionBegin; 1082 if (reuse == MAT_REUSE_MATRIX) { 1083 mat_scal = *newmat; 1084 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1085 } else { 1086 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1087 m = PETSC_DECIDE; 1088 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1089 n = PETSC_DECIDE; 1090 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1091 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1092 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1093 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1094 } 1095 for (row=rstart;row<rend;row++) { 1096 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1097 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1098 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1099 } 1100 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1101 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1102 1103 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1104 else *newmat = mat_scal; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1109 { 1110 Mat mat_scal; 1111 PetscErrorCode ierr; 1112 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1113 const PetscInt *cols; 1114 const PetscScalar *vals; 1115 PetscScalar v; 1116 1117 PetscFunctionBegin; 1118 if (reuse == MAT_REUSE_MATRIX) { 1119 mat_scal = *newmat; 1120 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1121 } else { 1122 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1123 m = PETSC_DECIDE; 1124 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1125 n = PETSC_DECIDE; 1126 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1127 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1128 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1129 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1130 } 1131 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1132 for (row=rstart;row<rend;row++) { 1133 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1134 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1135 for (j=0;j<ncols;j++) { /* lower triangular part */ 1136 if (cols[j] == row) continue; 1137 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1138 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1139 } 1140 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1141 } 1142 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1143 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1144 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1145 1146 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1147 else *newmat = mat_scal; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1152 { 1153 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1154 PetscErrorCode ierr; 1155 PetscInt sz=0; 1156 1157 PetscFunctionBegin; 1158 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1159 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1160 if (!a->lld) a->lld = a->locr; 1161 1162 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1163 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1164 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1165 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1166 1167 A->preallocated = PETSC_TRUE; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1172 { 1173 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1174 PetscErrorCode ierr; 1175 Mat_ScaLAPACK_Grid *grid; 1176 PetscBool flg; 1177 MPI_Comm icomm; 1178 1179 PetscFunctionBegin; 1180 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1181 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1182 ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1183 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1184 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1185 if (--grid->grid_refct == 0) { 1186 Cblacs_gridexit(grid->ictxt); 1187 Cblacs_gridexit(grid->ictxrow); 1188 Cblacs_gridexit(grid->ictxcol); 1189 ierr = PetscFree(grid);CHKERRQ(ierr); 1190 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 1191 } 1192 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1193 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1196 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscFree(A->data);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1202 { 1203 PetscErrorCode ierr; 1204 const PetscInt *ranges; 1205 PetscMPIInt size; 1206 PetscInt i,n; 1207 1208 PetscFunctionBegin; 1209 ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr); 1210 if (size>2) { 1211 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1212 n = ranges[1]-ranges[0]; 1213 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1214 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1220 { 1221 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1222 PetscErrorCode ierr; 1223 PetscBLASInt info=0; 1224 1225 PetscFunctionBegin; 1226 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1227 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1228 1229 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1230 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1231 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1232 1233 /* compute local sizes */ 1234 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1235 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1236 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1237 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1238 a->lld = PetscMax(1,a->locr); 1239 1240 /* allocate local array */ 1241 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1242 1243 /* set up ScaLAPACK descriptor */ 1244 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1245 PetscCheckScaLapackInfo("descinit",info); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1250 { 1251 PetscErrorCode ierr; 1252 PetscInt nstash,reallocs; 1253 1254 PetscFunctionBegin; 1255 if (A->nooffprocentries) PetscFunctionReturn(0); 1256 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1257 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1258 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1263 { 1264 PetscErrorCode ierr; 1265 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1266 PetscMPIInt n; 1267 PetscInt i,flg,*row,*col; 1268 PetscScalar *val; 1269 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1270 1271 PetscFunctionBegin; 1272 if (A->nooffprocentries) PetscFunctionReturn(0); 1273 while (1) { 1274 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1275 if (!flg) break; 1276 for (i=0;i<n;i++) { 1277 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1278 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1279 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1280 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1281 switch (A->insertmode) { 1282 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1283 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1284 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1285 } 1286 } 1287 } 1288 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1293 { 1294 PetscErrorCode ierr; 1295 Mat Adense,As; 1296 MPI_Comm comm; 1297 1298 PetscFunctionBegin; 1299 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1300 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1301 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1302 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1303 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1304 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1305 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /* -------------------------------------------------------------------*/ 1310 static struct _MatOps MatOps_Values = { 1311 MatSetValues_ScaLAPACK, 1312 0, 1313 0, 1314 MatMult_ScaLAPACK, 1315 /* 4*/ MatMultAdd_ScaLAPACK, 1316 MatMultTranspose_ScaLAPACK, 1317 MatMultTransposeAdd_ScaLAPACK, 1318 MatSolve_ScaLAPACK, 1319 MatSolveAdd_ScaLAPACK, 1320 0, 1321 /*10*/ 0, 1322 MatLUFactor_ScaLAPACK, 1323 MatCholeskyFactor_ScaLAPACK, 1324 0, 1325 MatTranspose_ScaLAPACK, 1326 /*15*/ MatGetInfo_ScaLAPACK, 1327 0, 1328 MatGetDiagonal_ScaLAPACK, 1329 MatDiagonalScale_ScaLAPACK, 1330 MatNorm_ScaLAPACK, 1331 /*20*/ MatAssemblyBegin_ScaLAPACK, 1332 MatAssemblyEnd_ScaLAPACK, 1333 MatSetOption_ScaLAPACK, 1334 MatZeroEntries_ScaLAPACK, 1335 /*24*/ 0, 1336 MatLUFactorSymbolic_ScaLAPACK, 1337 MatLUFactorNumeric_ScaLAPACK, 1338 MatCholeskyFactorSymbolic_ScaLAPACK, 1339 MatCholeskyFactorNumeric_ScaLAPACK, 1340 /*29*/ MatSetUp_ScaLAPACK, 1341 0, 1342 0, 1343 0, 1344 0, 1345 /*34*/ MatDuplicate_ScaLAPACK, 1346 0, 1347 0, 1348 0, 1349 0, 1350 /*39*/ MatAXPY_ScaLAPACK, 1351 0, 1352 0, 1353 0, 1354 MatCopy_ScaLAPACK, 1355 /*44*/ 0, 1356 MatScale_ScaLAPACK, 1357 MatShift_ScaLAPACK, 1358 0, 1359 0, 1360 /*49*/ 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*54*/ 0, 1366 0, 1367 0, 1368 0, 1369 0, 1370 /*59*/ 0, 1371 MatDestroy_ScaLAPACK, 1372 MatView_ScaLAPACK, 1373 0, 1374 0, 1375 /*64*/ 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 /*69*/ 0, 1381 0, 1382 MatConvert_ScaLAPACK_Dense, 1383 0, 1384 0, 1385 /*74*/ 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 /*79*/ 0, 1391 0, 1392 0, 1393 0, 1394 MatLoad_ScaLAPACK, 1395 /*84*/ 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*89*/ 0, 1401 0, 1402 MatMatMultNumeric_ScaLAPACK, 1403 0, 1404 0, 1405 /*94*/ 0, 1406 0, 1407 0, 1408 MatMatTransposeMultNumeric_ScaLAPACK, 1409 0, 1410 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1411 0, 1412 0, 1413 MatConjugate_ScaLAPACK, 1414 0, 1415 /*104*/0, 1416 0, 1417 0, 1418 0, 1419 0, 1420 /*109*/MatMatSolve_ScaLAPACK, 1421 0, 1422 0, 1423 0, 1424 MatMissingDiagonal_ScaLAPACK, 1425 /*114*/0, 1426 0, 1427 0, 1428 0, 1429 0, 1430 /*119*/0, 1431 MatHermitianTranspose_ScaLAPACK, 1432 0, 1433 0, 1434 0, 1435 /*124*/0, 1436 0, 1437 0, 1438 0, 1439 0, 1440 /*129*/0, 1441 0, 1442 0, 1443 0, 1444 0, 1445 /*134*/0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 0, 1451 /*140*/0, 1452 0, 1453 0, 1454 0, 1455 0, 1456 /*145*/0, 1457 0, 1458 0 1459 }; 1460 1461 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1462 { 1463 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1464 PetscInt size=stash->size,nsends; 1465 PetscErrorCode ierr; 1466 PetscInt count,*sindices,**rindices,i,j,l; 1467 PetscScalar **rvalues,*svalues; 1468 MPI_Comm comm = stash->comm; 1469 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1470 PetscMPIInt *sizes,*nlengths,nreceives; 1471 PetscInt *sp_idx,*sp_idy; 1472 PetscScalar *sp_val; 1473 PetscMatStashSpace space,space_next; 1474 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1475 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1476 1477 PetscFunctionBegin; 1478 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1479 InsertMode addv; 1480 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1481 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1482 mat->insertmode = addv; /* in case this processor had no cache */ 1483 } 1484 1485 bs2 = stash->bs*stash->bs; 1486 1487 /* first count number of contributors to each processor */ 1488 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1489 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1490 1491 i = j = 0; 1492 space = stash->space_head; 1493 while (space) { 1494 space_next = space->next; 1495 for (l=0; l<space->local_used; l++) { 1496 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1497 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1498 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1499 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1500 nlengths[j]++; owner[i] = j; 1501 i++; 1502 } 1503 space = space_next; 1504 } 1505 1506 /* Now check what procs get messages - and compute nsends. */ 1507 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1508 for (i=0, nsends=0; i<size; i++) { 1509 if (nlengths[i]) { 1510 sizes[i] = 1; nsends++; 1511 } 1512 } 1513 1514 {PetscMPIInt *onodes,*olengths; 1515 /* Determine the number of messages to expect, their lengths, from from-ids */ 1516 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1517 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1518 /* since clubbing row,col - lengths are multiplied by 2 */ 1519 for (i=0; i<nreceives; i++) olengths[i] *=2; 1520 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1521 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1522 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1523 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1524 ierr = PetscFree(onodes);CHKERRQ(ierr); 1525 ierr = PetscFree(olengths);CHKERRQ(ierr);} 1526 1527 /* do sends: 1528 1) starts[i] gives the starting index in svalues for stuff going to 1529 the ith processor 1530 */ 1531 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1532 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1533 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1534 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1535 startv[0] = 0; starti[0] = 0; 1536 for (i=1; i<size; i++) { 1537 startv[i] = startv[i-1] + nlengths[i-1]; 1538 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1539 } 1540 1541 i = 0; 1542 space = stash->space_head; 1543 while (space) { 1544 space_next = space->next; 1545 sp_idx = space->idx; 1546 sp_idy = space->idy; 1547 sp_val = space->val; 1548 for (l=0; l<space->local_used; l++) { 1549 j = owner[i]; 1550 if (bs2 == 1) { 1551 svalues[startv[j]] = sp_val[l]; 1552 } else { 1553 PetscInt k; 1554 PetscScalar *buf1,*buf2; 1555 buf1 = svalues+bs2*startv[j]; 1556 buf2 = space->val + bs2*l; 1557 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1558 } 1559 sindices[starti[j]] = sp_idx[l]; 1560 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1561 startv[j]++; 1562 starti[j]++; 1563 i++; 1564 } 1565 space = space_next; 1566 } 1567 startv[0] = 0; 1568 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1569 1570 for (i=0,count=0; i<size; i++) { 1571 if (sizes[i]) { 1572 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 1573 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 1574 } 1575 } 1576 #if defined(PETSC_USE_INFO) 1577 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1578 for (i=0; i<size; i++) { 1579 if (sizes[i]) { 1580 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1581 } 1582 } 1583 #endif 1584 ierr = PetscFree(nlengths);CHKERRQ(ierr); 1585 ierr = PetscFree(owner);CHKERRQ(ierr); 1586 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1587 ierr = PetscFree(sizes);CHKERRQ(ierr); 1588 1589 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1590 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1591 1592 for (i=0; i<nreceives; i++) { 1593 recv_waits[2*i] = recv_waits1[i]; 1594 recv_waits[2*i+1] = recv_waits2[i]; 1595 } 1596 stash->recv_waits = recv_waits; 1597 1598 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1599 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1600 1601 stash->svalues = svalues; 1602 stash->sindices = sindices; 1603 stash->rvalues = rvalues; 1604 stash->rindices = rindices; 1605 stash->send_waits = send_waits; 1606 stash->nsends = nsends; 1607 stash->nrecvs = nreceives; 1608 stash->reproduce_count = 0; 1609 PetscFunctionReturn(0); 1610 } 1611 1612 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1613 { 1614 PetscErrorCode ierr; 1615 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1616 1617 PetscFunctionBegin; 1618 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1619 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1620 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1621 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1622 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 /*@ 1627 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1628 the ScaLAPACK matrix 1629 1630 Logically Collective on A 1631 1632 Input Parameter: 1633 + A - a MATSCALAPACK matrix 1634 . mb - the row block size 1635 - nb - the column block size 1636 1637 Level: intermediate 1638 1639 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1640 @*/ 1641 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1642 { 1643 PetscErrorCode ierr; 1644 1645 PetscFunctionBegin; 1646 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1647 PetscValidLogicalCollectiveInt(A,mb,2); 1648 PetscValidLogicalCollectiveInt(A,nb,3); 1649 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1650 PetscFunctionReturn(0); 1651 } 1652 1653 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1654 { 1655 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1656 1657 PetscFunctionBegin; 1658 if (mb) *mb = a->mb; 1659 if (nb) *nb = a->nb; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 /*@ 1664 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1665 the ScaLAPACK matrix 1666 1667 Not collective 1668 1669 Input Parameter: 1670 . A - a MATSCALAPACK matrix 1671 1672 Output Parameters: 1673 + mb - the row block size 1674 - nb - the column block size 1675 1676 Level: intermediate 1677 1678 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1679 @*/ 1680 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1681 { 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1686 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1691 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1692 1693 /*MC 1694 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1695 1696 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1697 1698 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1699 1700 Options Database Keys: 1701 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1702 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1703 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1704 1705 Level: beginner 1706 1707 .seealso: MATDENSE, MATELEMENTAL 1708 M*/ 1709 1710 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1711 { 1712 Mat_ScaLAPACK *a; 1713 PetscErrorCode ierr; 1714 PetscBool flg,flg1; 1715 Mat_ScaLAPACK_Grid *grid; 1716 MPI_Comm icomm; 1717 PetscBLASInt nprow,npcol,myrow,mycol; 1718 PetscInt optv1,k=2,array[2]={0,0}; 1719 PetscMPIInt size; 1720 1721 PetscFunctionBegin; 1722 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1723 A->insertmode = NOT_SET_VALUES; 1724 1725 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1726 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1727 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1728 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1729 A->stash.ScatterDestroy = NULL; 1730 1731 ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1732 A->data = (void*)a; 1733 1734 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1735 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1736 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr); 1737 } 1738 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1739 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr); 1740 if (!flg) { 1741 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1742 1743 ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr); 1744 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1745 1746 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1747 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1748 if (flg1) { 1749 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1750 grid->nprow = optv1; 1751 } 1752 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1753 1754 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1755 grid->npcol = size/grid->nprow; 1756 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1757 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1758 Cblacs_get(-1,0,&grid->ictxt); 1759 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1760 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1761 grid->grid_refct = 1; 1762 grid->nprow = nprow; 1763 grid->npcol = npcol; 1764 grid->myrow = myrow; 1765 grid->mycol = mycol; 1766 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1767 Cblacs_get(-1,0,&grid->ictxrow); 1768 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1769 Cblacs_get(-1,0,&grid->ictxcol); 1770 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1771 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr); 1772 1773 } else grid->grid_refct++; 1774 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1775 a->grid = grid; 1776 a->mb = DEFAULT_BLOCKSIZE; 1777 a->nb = DEFAULT_BLOCKSIZE; 1778 1779 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1780 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1781 if (flg) { 1782 a->mb = array[0]; 1783 a->nb = (k>1)? array[1]: a->mb; 1784 } 1785 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1786 1787 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1788 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1789 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1790 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 /*@C 1795 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1796 (2D block cyclic distribution). 1797 1798 Collective 1799 1800 Input Parameters: 1801 + comm - MPI communicator 1802 . mb - row block size (or PETSC_DECIDE to have it set) 1803 . nb - column block size (or PETSC_DECIDE to have it set) 1804 . M - number of global rows 1805 . N - number of global columns 1806 . rsrc - coordinate of process that owns the first row of the distributed matrix 1807 - csrc - coordinate of process that owns the first column of the distributed matrix 1808 1809 Output Parameter: 1810 . A - the matrix 1811 1812 Options Database Keys: 1813 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1814 1815 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1816 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1817 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1818 1819 Notes: 1820 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1821 is chosen. 1822 1823 Storage Information: 1824 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1825 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1826 significance and are thus ignored. The block sizes refer to the values 1827 used for the distributed matrix, not the same meaning as in BAIJ. 1828 1829 Level: intermediate 1830 1831 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1832 @*/ 1833 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1834 { 1835 PetscErrorCode ierr; 1836 Mat_ScaLAPACK *a; 1837 PetscInt m,n; 1838 1839 PetscFunctionBegin; 1840 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1841 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1842 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1843 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1844 m = PETSC_DECIDE; 1845 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1846 n = PETSC_DECIDE; 1847 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1848 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1849 a = (Mat_ScaLAPACK*)(*A)->data; 1850 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1851 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1856 ierr = MatSetUp(*A);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860