1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(0); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(0); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(0); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(0); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 xlld = PetscMax(1, A->rmap->n); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 xlld = PetscMax(1, lszx); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 201 /* redistribute y from a row of a 2d matrix */ 202 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 203 204 } else { /* non-transpose */ 205 206 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 207 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 208 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 209 xlld = 1; 210 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 211 PetscCheckScaLapackInfo("descinit", info); 212 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 213 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 214 ylld = PetscMax(1, A->rmap->n); 215 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 216 PetscCheckScaLapackInfo("descinit", info); 217 218 /* allocate 2d vectors */ 219 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 220 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 221 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 222 ylld = PetscMax(1, lszy); 223 224 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 225 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 226 PetscCheckScaLapackInfo("descinit", info); 227 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 228 PetscCheckScaLapackInfo("descinit", info); 229 230 /* redistribute x as a row of a 2d matrix */ 231 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 232 233 /* redistribute y as a column of a 2d matrix */ 234 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 235 236 /* call PBLAS subroutine */ 237 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 238 239 /* redistribute y from a column of a 2d matrix */ 240 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 241 } 242 PetscCall(PetscFree2(x2d, y2d)); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 247 { 248 const PetscScalar *xarray; 249 PetscScalar *yarray; 250 251 PetscFunctionBegin; 252 PetscCall(VecGetArrayRead(x, &xarray)); 253 PetscCall(VecGetArray(y, &yarray)); 254 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 255 PetscCall(VecRestoreArrayRead(x, &xarray)); 256 PetscCall(VecRestoreArray(y, &yarray)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 261 { 262 const PetscScalar *xarray; 263 PetscScalar *yarray; 264 265 PetscFunctionBegin; 266 PetscCall(VecGetArrayRead(x, &xarray)); 267 PetscCall(VecGetArray(y, &yarray)); 268 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 269 PetscCall(VecRestoreArrayRead(x, &xarray)); 270 PetscCall(VecRestoreArray(y, &yarray)); 271 PetscFunctionReturn(0); 272 } 273 274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 275 { 276 const PetscScalar *xarray; 277 PetscScalar *zarray; 278 279 PetscFunctionBegin; 280 if (y != z) PetscCall(VecCopy(y, z)); 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArray(z, &zarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArray(z, &zarray)); 286 PetscFunctionReturn(0); 287 } 288 289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(0); 302 } 303 304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 305 { 306 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 307 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 308 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 309 PetscScalar sone = 1.0, zero = 0.0; 310 PetscBLASInt one = 1; 311 312 PetscFunctionBegin; 313 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 314 C->assembled = PETSC_TRUE; 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 319 { 320 PetscFunctionBegin; 321 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 322 PetscCall(MatSetType(C, MATSCALAPACK)); 323 PetscCall(MatSetUp(C)); 324 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 325 PetscFunctionReturn(0); 326 } 327 328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 329 { 330 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 331 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 332 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 333 PetscScalar sone = 1.0, zero = 0.0; 334 PetscBLASInt one = 1; 335 336 PetscFunctionBegin; 337 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 338 C->assembled = PETSC_TRUE; 339 PetscFunctionReturn(0); 340 } 341 342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 343 { 344 PetscFunctionBegin; 345 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 346 PetscCall(MatSetType(C, MATSCALAPACK)); 347 PetscCall(MatSetUp(C)); 348 PetscFunctionReturn(0); 349 } 350 351 /* --------------------------------------- */ 352 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 353 { 354 PetscFunctionBegin; 355 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 356 C->ops->productsymbolic = MatProductSymbolic_AB; 357 PetscFunctionReturn(0); 358 } 359 360 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 361 { 362 PetscFunctionBegin; 363 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 364 C->ops->productsymbolic = MatProductSymbolic_ABt; 365 PetscFunctionReturn(0); 366 } 367 368 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 369 { 370 Mat_Product *product = C->product; 371 372 PetscFunctionBegin; 373 switch (product->type) { 374 case MATPRODUCT_AB: 375 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 376 break; 377 case MATPRODUCT_ABt: 378 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 379 break; 380 default: 381 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 382 } 383 PetscFunctionReturn(0); 384 } 385 /* --------------------------------------- */ 386 387 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 388 { 389 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 390 PetscScalar *darray, *d2d, v; 391 const PetscInt *ranges; 392 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 393 394 PetscFunctionBegin; 395 PetscCall(VecGetArray(D, &darray)); 396 397 if (A->rmap->N <= A->cmap->N) { /* row version */ 398 399 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 400 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 401 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 402 dlld = PetscMax(1, A->rmap->n); 403 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 404 PetscCheckScaLapackInfo("descinit", info); 405 406 /* allocate 2d vector */ 407 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 408 PetscCall(PetscCalloc1(lszd, &d2d)); 409 dlld = PetscMax(1, lszd); 410 411 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 412 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 413 PetscCheckScaLapackInfo("descinit", info); 414 415 /* collect diagonal */ 416 for (j = 1; j <= a->M; j++) { 417 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 418 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 419 } 420 421 /* redistribute d from a column of a 2d matrix */ 422 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 423 PetscCall(PetscFree(d2d)); 424 425 } else { /* column version */ 426 427 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 428 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 429 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 430 dlld = 1; 431 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 432 PetscCheckScaLapackInfo("descinit", info); 433 434 /* allocate 2d vector */ 435 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 436 PetscCall(PetscCalloc1(lszd, &d2d)); 437 438 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 439 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 440 PetscCheckScaLapackInfo("descinit", info); 441 442 /* collect diagonal */ 443 for (j = 1; j <= a->N; j++) { 444 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 445 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 446 } 447 448 /* redistribute d from a row of a 2d matrix */ 449 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 450 PetscCall(PetscFree(d2d)); 451 } 452 453 PetscCall(VecRestoreArray(D, &darray)); 454 PetscCall(VecAssemblyBegin(D)); 455 PetscCall(VecAssemblyEnd(D)); 456 PetscFunctionReturn(0); 457 } 458 459 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 460 { 461 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 462 const PetscScalar *d; 463 const PetscInt *ranges; 464 PetscScalar *d2d; 465 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 466 467 PetscFunctionBegin; 468 if (R) { 469 PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 470 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 471 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 472 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473 dlld = 1; 474 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475 PetscCheckScaLapackInfo("descinit", info); 476 477 /* allocate 2d vector */ 478 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 479 PetscCall(PetscCalloc1(lszd, &d2d)); 480 481 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483 PetscCheckScaLapackInfo("descinit", info); 484 485 /* redistribute d to a row of a 2d matrix */ 486 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 487 488 /* broadcast along process columns */ 489 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 490 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 491 492 /* local scaling */ 493 for (j = 0; j < a->locc; j++) 494 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 495 496 PetscCall(PetscFree(d2d)); 497 PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 498 } 499 if (L) { 500 PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 501 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 502 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 503 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 504 dlld = PetscMax(1, A->rmap->n); 505 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 506 PetscCheckScaLapackInfo("descinit", info); 507 508 /* allocate 2d vector */ 509 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 510 PetscCall(PetscCalloc1(lszd, &d2d)); 511 dlld = PetscMax(1, lszd); 512 513 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 514 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 515 PetscCheckScaLapackInfo("descinit", info); 516 517 /* redistribute d to a column of a 2d matrix */ 518 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 519 520 /* broadcast along process rows */ 521 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 522 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 523 524 /* local scaling */ 525 for (i = 0; i < a->locr; i++) 526 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 527 528 PetscCall(PetscFree(d2d)); 529 PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 530 } 531 PetscFunctionReturn(0); 532 } 533 534 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 535 { 536 PetscFunctionBegin; 537 *missing = PETSC_FALSE; 538 PetscFunctionReturn(0); 539 } 540 541 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 542 { 543 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 544 PetscBLASInt n, one = 1; 545 546 PetscFunctionBegin; 547 n = x->lld * x->locc; 548 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 553 { 554 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 555 PetscBLASInt i, n; 556 PetscScalar v; 557 558 PetscFunctionBegin; 559 n = PetscMin(x->M, x->N); 560 for (i = 1; i <= n; i++) { 561 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 562 v += alpha; 563 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 564 } 565 PetscFunctionReturn(0); 566 } 567 568 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 569 { 570 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 571 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 572 PetscBLASInt one = 1; 573 PetscScalar beta = 1.0; 574 575 PetscFunctionBegin; 576 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 577 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 578 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 579 PetscFunctionReturn(0); 580 } 581 582 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 583 { 584 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 585 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 586 587 PetscFunctionBegin; 588 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 589 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 590 PetscFunctionReturn(0); 591 } 592 593 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 594 { 595 Mat Bs; 596 MPI_Comm comm; 597 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 598 599 PetscFunctionBegin; 600 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 601 PetscCall(MatCreate(comm, &Bs)); 602 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 603 PetscCall(MatSetType(Bs, MATSCALAPACK)); 604 b = (Mat_ScaLAPACK *)Bs->data; 605 b->M = a->M; 606 b->N = a->N; 607 b->mb = a->mb; 608 b->nb = a->nb; 609 b->rsrc = a->rsrc; 610 b->csrc = a->csrc; 611 PetscCall(MatSetUp(Bs)); 612 *B = Bs; 613 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 614 Bs->assembled = PETSC_TRUE; 615 PetscFunctionReturn(0); 616 } 617 618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 619 { 620 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 621 Mat Bs = *B; 622 PetscBLASInt one = 1; 623 PetscScalar sone = 1.0, zero = 0.0; 624 #if defined(PETSC_USE_COMPLEX) 625 PetscInt i, j; 626 #endif 627 628 PetscFunctionBegin; 629 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 630 if (reuse == MAT_INITIAL_MATRIX) { 631 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 632 *B = Bs; 633 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 634 b = (Mat_ScaLAPACK *)Bs->data; 635 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 636 #if defined(PETSC_USE_COMPLEX) 637 /* undo conjugation */ 638 for (i = 0; i < b->locr; i++) 639 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 640 #endif 641 Bs->assembled = PETSC_TRUE; 642 PetscFunctionReturn(0); 643 } 644 645 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 646 { 647 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 648 PetscInt i, j; 649 650 PetscFunctionBegin; 651 for (i = 0; i < a->locr; i++) 652 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 653 PetscFunctionReturn(0); 654 } 655 656 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 657 { 658 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 659 Mat Bs = *B; 660 PetscBLASInt one = 1; 661 PetscScalar sone = 1.0, zero = 0.0; 662 663 PetscFunctionBegin; 664 if (reuse == MAT_INITIAL_MATRIX) { 665 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 666 *B = Bs; 667 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 668 b = (Mat_ScaLAPACK *)Bs->data; 669 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 670 Bs->assembled = PETSC_TRUE; 671 PetscFunctionReturn(0); 672 } 673 674 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 675 { 676 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 677 PetscScalar *x, *x2d; 678 const PetscInt *ranges; 679 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 680 681 PetscFunctionBegin; 682 PetscCall(VecCopy(B, X)); 683 PetscCall(VecGetArray(X, &x)); 684 685 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 686 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 687 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 688 xlld = PetscMax(1, A->rmap->n); 689 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 690 PetscCheckScaLapackInfo("descinit", info); 691 692 /* allocate 2d vector */ 693 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 694 PetscCall(PetscMalloc1(lszx, &x2d)); 695 xlld = PetscMax(1, lszx); 696 697 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 698 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 699 PetscCheckScaLapackInfo("descinit", info); 700 701 /* redistribute x as a column of a 2d matrix */ 702 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 703 704 /* call ScaLAPACK subroutine */ 705 switch (A->factortype) { 706 case MAT_FACTOR_LU: 707 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 708 PetscCheckScaLapackInfo("getrs", info); 709 break; 710 case MAT_FACTOR_CHOLESKY: 711 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 712 PetscCheckScaLapackInfo("potrs", info); 713 break; 714 default: 715 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 716 } 717 718 /* redistribute x from a column of a 2d matrix */ 719 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 720 721 PetscCall(PetscFree(x2d)); 722 PetscCall(VecRestoreArray(X, &x)); 723 PetscFunctionReturn(0); 724 } 725 726 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 727 { 728 PetscFunctionBegin; 729 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 730 PetscCall(VecAXPY(X, 1, Y)); 731 PetscFunctionReturn(0); 732 } 733 734 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 735 { 736 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 737 PetscBool flg1, flg2; 738 PetscBLASInt one = 1, info; 739 740 PetscFunctionBegin; 741 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 742 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 743 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 744 MatScaLAPACKCheckDistribution(B, 1, X, 2); 745 b = (Mat_ScaLAPACK *)B->data; 746 x = (Mat_ScaLAPACK *)X->data; 747 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 748 749 switch (A->factortype) { 750 case MAT_FACTOR_LU: 751 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 752 PetscCheckScaLapackInfo("getrs", info); 753 break; 754 case MAT_FACTOR_CHOLESKY: 755 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 756 PetscCheckScaLapackInfo("potrs", info); 757 break; 758 default: 759 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 765 { 766 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 767 PetscBLASInt one = 1, info; 768 769 PetscFunctionBegin; 770 if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); } 771 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 772 PetscCheckScaLapackInfo("getrf", info); 773 A->factortype = MAT_FACTOR_LU; 774 A->assembled = PETSC_TRUE; 775 776 PetscCall(PetscFree(A->solvertype)); 777 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 778 PetscFunctionReturn(0); 779 } 780 781 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 782 { 783 PetscFunctionBegin; 784 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 785 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 786 PetscFunctionReturn(0); 787 } 788 789 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 790 { 791 PetscFunctionBegin; 792 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 793 PetscFunctionReturn(0); 794 } 795 796 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 797 { 798 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 799 PetscBLASInt one = 1, info; 800 801 PetscFunctionBegin; 802 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 803 PetscCheckScaLapackInfo("potrf", info); 804 A->factortype = MAT_FACTOR_CHOLESKY; 805 A->assembled = PETSC_TRUE; 806 807 PetscCall(PetscFree(A->solvertype)); 808 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 809 PetscFunctionReturn(0); 810 } 811 812 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 813 { 814 PetscFunctionBegin; 815 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 816 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 817 PetscFunctionReturn(0); 818 } 819 820 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 821 { 822 PetscFunctionBegin; 823 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 824 PetscFunctionReturn(0); 825 } 826 827 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 828 { 829 PetscFunctionBegin; 830 *type = MATSOLVERSCALAPACK; 831 PetscFunctionReturn(0); 832 } 833 834 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 835 { 836 Mat B; 837 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 838 839 PetscFunctionBegin; 840 /* Create the factorization matrix */ 841 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 842 B->trivialsymbolic = PETSC_TRUE; 843 B->factortype = ftype; 844 PetscCall(PetscFree(B->solvertype)); 845 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 846 847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 848 *F = B; 849 PetscFunctionReturn(0); 850 } 851 852 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 853 { 854 PetscFunctionBegin; 855 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 856 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 857 PetscFunctionReturn(0); 858 } 859 860 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 861 { 862 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 863 PetscBLASInt one = 1, lwork = 0; 864 const char *ntype; 865 PetscScalar *work = NULL, dummy; 866 867 PetscFunctionBegin; 868 switch (type) { 869 case NORM_1: 870 ntype = "1"; 871 lwork = PetscMax(a->locr, a->locc); 872 break; 873 case NORM_FROBENIUS: 874 ntype = "F"; 875 work = &dummy; 876 break; 877 case NORM_INFINITY: 878 ntype = "I"; 879 lwork = PetscMax(a->locr, a->locc); 880 break; 881 default: 882 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 883 } 884 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 885 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 886 if (lwork) PetscCall(PetscFree(work)); 887 PetscFunctionReturn(0); 888 } 889 890 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 891 { 892 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 893 894 PetscFunctionBegin; 895 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 896 PetscFunctionReturn(0); 897 } 898 899 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 900 { 901 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 902 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 903 904 PetscFunctionBegin; 905 if (rows) { 906 n = a->locr; 907 nb = a->mb; 908 isrc = a->rsrc; 909 nproc = a->grid->nprow; 910 iproc = a->grid->myrow; 911 PetscCall(PetscMalloc1(n, &idx)); 912 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 913 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 914 } 915 if (cols) { 916 n = a->locc; 917 nb = a->nb; 918 isrc = a->csrc; 919 nproc = a->grid->npcol; 920 iproc = a->grid->mycol; 921 PetscCall(PetscMalloc1(n, &idx)); 922 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 923 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 929 { 930 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 931 Mat Bmpi; 932 MPI_Comm comm; 933 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 934 const PetscInt *ranges, *branges, *cwork; 935 const PetscScalar *vwork; 936 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 937 PetscScalar *barray; 938 PetscBool differ = PETSC_FALSE; 939 PetscMPIInt size; 940 941 PetscFunctionBegin; 942 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 943 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 944 945 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 946 PetscCallMPI(MPI_Comm_size(comm, &size)); 947 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 948 for (i = 0; i < size; i++) 949 if (ranges[i + 1] != branges[i + 1]) { 950 differ = PETSC_TRUE; 951 break; 952 } 953 } 954 955 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 956 PetscCall(MatCreate(comm, &Bmpi)); 957 m = PETSC_DECIDE; 958 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 959 n = PETSC_DECIDE; 960 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 961 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 962 PetscCall(MatSetType(Bmpi, MATDENSE)); 963 PetscCall(MatSetUp(Bmpi)); 964 965 /* create ScaLAPACK descriptor for B (1d block distribution) */ 966 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 967 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 968 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 969 PetscCheckScaLapackInfo("descinit", info); 970 971 /* redistribute matrix */ 972 PetscCall(MatDenseGetArray(Bmpi, &barray)); 973 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 974 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 975 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 976 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 977 978 /* transfer rows of auxiliary matrix to the final matrix B */ 979 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 980 for (i = rstart; i < rend; i++) { 981 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 982 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 983 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 984 } 985 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 987 PetscCall(MatDestroy(&Bmpi)); 988 989 } else { /* normal cases */ 990 991 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 992 else { 993 PetscCall(MatCreate(comm, &Bmpi)); 994 m = PETSC_DECIDE; 995 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 996 n = PETSC_DECIDE; 997 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 998 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 999 PetscCall(MatSetType(Bmpi, MATDENSE)); 1000 PetscCall(MatSetUp(Bmpi)); 1001 } 1002 1003 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1004 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1005 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 1006 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1007 PetscCheckScaLapackInfo("descinit", info); 1008 1009 /* redistribute matrix */ 1010 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1011 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1012 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1013 1014 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1015 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1016 if (reuse == MAT_INPLACE_MATRIX) { 1017 PetscCall(MatHeaderReplace(A, &Bmpi)); 1018 } else *B = Bmpi; 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1024 { 1025 const PetscInt *ranges; 1026 PetscMPIInt size; 1027 PetscInt i, n; 1028 1029 PetscFunctionBegin; 1030 *correct = PETSC_TRUE; 1031 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1032 if (size > 1) { 1033 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1034 n = ranges[1] - ranges[0]; 1035 for (i = 1; i < size; i++) 1036 if (ranges[i + 1] - ranges[i] != n) break; 1037 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1043 { 1044 Mat_ScaLAPACK *b; 1045 Mat Bmpi; 1046 MPI_Comm comm; 1047 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1048 const PetscInt *ranges, *rows, *cols; 1049 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1050 PetscScalar *aarray; 1051 IS ir, ic; 1052 PetscInt lda; 1053 PetscBool flg; 1054 1055 PetscFunctionBegin; 1056 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1057 1058 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1059 else { 1060 PetscCall(MatCreate(comm, &Bmpi)); 1061 m = PETSC_DECIDE; 1062 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1063 n = PETSC_DECIDE; 1064 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1065 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1066 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1067 PetscCall(MatSetUp(Bmpi)); 1068 } 1069 b = (Mat_ScaLAPACK *)Bmpi->data; 1070 1071 PetscCall(MatDenseGetLDA(A, &lda)); 1072 PetscCall(MatDenseGetArray(A, &aarray)); 1073 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1074 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1075 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1076 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1077 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1078 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1079 lld = PetscMax(lda, 1); /* local leading dimension */ 1080 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1081 PetscCheckScaLapackInfo("descinit", info); 1082 1083 /* redistribute matrix */ 1084 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1085 Bmpi->nooffprocentries = PETSC_TRUE; 1086 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1087 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1088 b->roworiented = PETSC_FALSE; 1089 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1090 PetscCall(ISGetIndices(ir, &rows)); 1091 PetscCall(ISGetIndices(ic, &cols)); 1092 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1093 PetscCall(ISRestoreIndices(ir, &rows)); 1094 PetscCall(ISRestoreIndices(ic, &cols)); 1095 PetscCall(ISDestroy(&ic)); 1096 PetscCall(ISDestroy(&ir)); 1097 } 1098 PetscCall(MatDenseRestoreArray(A, &aarray)); 1099 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1100 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1101 if (reuse == MAT_INPLACE_MATRIX) { 1102 PetscCall(MatHeaderReplace(A, &Bmpi)); 1103 } else *B = Bmpi; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1108 { 1109 Mat mat_scal; 1110 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1111 const PetscInt *cols; 1112 const PetscScalar *vals; 1113 1114 PetscFunctionBegin; 1115 if (reuse == MAT_REUSE_MATRIX) { 1116 mat_scal = *newmat; 1117 PetscCall(MatZeroEntries(mat_scal)); 1118 } else { 1119 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1120 m = PETSC_DECIDE; 1121 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1122 n = PETSC_DECIDE; 1123 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1124 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1125 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1126 PetscCall(MatSetUp(mat_scal)); 1127 } 1128 for (row = rstart; row < rend; row++) { 1129 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1130 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1131 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1132 } 1133 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1134 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1135 1136 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1137 else *newmat = mat_scal; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1142 { 1143 Mat mat_scal; 1144 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1145 const PetscInt *cols; 1146 const PetscScalar *vals; 1147 PetscScalar v; 1148 1149 PetscFunctionBegin; 1150 if (reuse == MAT_REUSE_MATRIX) { 1151 mat_scal = *newmat; 1152 PetscCall(MatZeroEntries(mat_scal)); 1153 } else { 1154 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1155 m = PETSC_DECIDE; 1156 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1157 n = PETSC_DECIDE; 1158 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1159 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1160 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1161 PetscCall(MatSetUp(mat_scal)); 1162 } 1163 PetscCall(MatGetRowUpperTriangular(A)); 1164 for (row = rstart; row < rend; row++) { 1165 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1166 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1167 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1168 if (cols[j] == row) continue; 1169 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1170 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1171 } 1172 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1173 } 1174 PetscCall(MatRestoreRowUpperTriangular(A)); 1175 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1176 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1177 1178 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1179 else *newmat = mat_scal; 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1184 { 1185 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1186 PetscInt sz = 0; 1187 1188 PetscFunctionBegin; 1189 PetscCall(PetscLayoutSetUp(A->rmap)); 1190 PetscCall(PetscLayoutSetUp(A->cmap)); 1191 if (!a->lld) a->lld = a->locr; 1192 1193 PetscCall(PetscFree(a->loc)); 1194 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1195 PetscCall(PetscCalloc1(sz, &a->loc)); 1196 1197 A->preallocated = PETSC_TRUE; 1198 PetscFunctionReturn(0); 1199 } 1200 1201 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1202 { 1203 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1204 Mat_ScaLAPACK_Grid *grid; 1205 PetscBool flg; 1206 MPI_Comm icomm; 1207 1208 PetscFunctionBegin; 1209 PetscCall(MatStashDestroy_Private(&A->stash)); 1210 PetscCall(PetscFree(a->loc)); 1211 PetscCall(PetscFree(a->pivots)); 1212 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1213 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1214 if (--grid->grid_refct == 0) { 1215 Cblacs_gridexit(grid->ictxt); 1216 Cblacs_gridexit(grid->ictxrow); 1217 Cblacs_gridexit(grid->ictxcol); 1218 PetscCall(PetscFree(grid)); 1219 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1220 } 1221 PetscCall(PetscCommDestroy(&icomm)); 1222 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1223 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1224 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1225 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1226 PetscCall(PetscFree(A->data)); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1231 { 1232 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1233 PetscBLASInt info = 0; 1234 PetscBool flg; 1235 1236 PetscFunctionBegin; 1237 PetscCall(PetscLayoutSetUp(A->rmap)); 1238 PetscCall(PetscLayoutSetUp(A->cmap)); 1239 1240 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1241 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1242 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1243 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1244 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1245 1246 /* compute local sizes */ 1247 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1248 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1249 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1250 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1251 a->lld = PetscMax(1, a->locr); 1252 1253 /* allocate local array */ 1254 PetscCall(MatScaLAPACKSetPreallocation(A)); 1255 1256 /* set up ScaLAPACK descriptor */ 1257 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1258 PetscCheckScaLapackInfo("descinit", info); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1263 { 1264 PetscInt nstash, reallocs; 1265 1266 PetscFunctionBegin; 1267 if (A->nooffprocentries) PetscFunctionReturn(0); 1268 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1269 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1270 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1275 { 1276 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1277 PetscMPIInt n; 1278 PetscInt i, flg, *row, *col; 1279 PetscScalar *val; 1280 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1281 1282 PetscFunctionBegin; 1283 if (A->nooffprocentries) PetscFunctionReturn(0); 1284 while (1) { 1285 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1286 if (!flg) break; 1287 for (i = 0; i < n; i++) { 1288 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1289 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1290 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1291 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1292 switch (A->insertmode) { 1293 case INSERT_VALUES: 1294 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1295 break; 1296 case ADD_VALUES: 1297 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1298 break; 1299 default: 1300 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1301 } 1302 } 1303 } 1304 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1309 { 1310 Mat Adense, As; 1311 MPI_Comm comm; 1312 1313 PetscFunctionBegin; 1314 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1315 PetscCall(MatCreate(comm, &Adense)); 1316 PetscCall(MatSetType(Adense, MATDENSE)); 1317 PetscCall(MatLoad(Adense, viewer)); 1318 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1319 PetscCall(MatDestroy(&Adense)); 1320 PetscCall(MatHeaderReplace(newMat, &As)); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /* -------------------------------------------------------------------*/ 1325 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1326 0, 1327 0, 1328 MatMult_ScaLAPACK, 1329 /* 4*/ MatMultAdd_ScaLAPACK, 1330 MatMultTranspose_ScaLAPACK, 1331 MatMultTransposeAdd_ScaLAPACK, 1332 MatSolve_ScaLAPACK, 1333 MatSolveAdd_ScaLAPACK, 1334 0, 1335 /*10*/ 0, 1336 MatLUFactor_ScaLAPACK, 1337 MatCholeskyFactor_ScaLAPACK, 1338 0, 1339 MatTranspose_ScaLAPACK, 1340 /*15*/ MatGetInfo_ScaLAPACK, 1341 0, 1342 MatGetDiagonal_ScaLAPACK, 1343 MatDiagonalScale_ScaLAPACK, 1344 MatNorm_ScaLAPACK, 1345 /*20*/ MatAssemblyBegin_ScaLAPACK, 1346 MatAssemblyEnd_ScaLAPACK, 1347 MatSetOption_ScaLAPACK, 1348 MatZeroEntries_ScaLAPACK, 1349 /*24*/ 0, 1350 MatLUFactorSymbolic_ScaLAPACK, 1351 MatLUFactorNumeric_ScaLAPACK, 1352 MatCholeskyFactorSymbolic_ScaLAPACK, 1353 MatCholeskyFactorNumeric_ScaLAPACK, 1354 /*29*/ MatSetUp_ScaLAPACK, 1355 0, 1356 0, 1357 0, 1358 0, 1359 /*34*/ MatDuplicate_ScaLAPACK, 1360 0, 1361 0, 1362 0, 1363 0, 1364 /*39*/ MatAXPY_ScaLAPACK, 1365 0, 1366 0, 1367 0, 1368 MatCopy_ScaLAPACK, 1369 /*44*/ 0, 1370 MatScale_ScaLAPACK, 1371 MatShift_ScaLAPACK, 1372 0, 1373 0, 1374 /*49*/ 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /*54*/ 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /*59*/ 0, 1385 MatDestroy_ScaLAPACK, 1386 MatView_ScaLAPACK, 1387 0, 1388 0, 1389 /*64*/ 0, 1390 0, 1391 0, 1392 0, 1393 0, 1394 /*69*/ 0, 1395 0, 1396 MatConvert_ScaLAPACK_Dense, 1397 0, 1398 0, 1399 /*74*/ 0, 1400 0, 1401 0, 1402 0, 1403 0, 1404 /*79*/ 0, 1405 0, 1406 0, 1407 0, 1408 MatLoad_ScaLAPACK, 1409 /*84*/ 0, 1410 0, 1411 0, 1412 0, 1413 0, 1414 /*89*/ 0, 1415 0, 1416 MatMatMultNumeric_ScaLAPACK, 1417 0, 1418 0, 1419 /*94*/ 0, 1420 0, 1421 0, 1422 MatMatTransposeMultNumeric_ScaLAPACK, 1423 0, 1424 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1425 0, 1426 0, 1427 MatConjugate_ScaLAPACK, 1428 0, 1429 /*104*/ 0, 1430 0, 1431 0, 1432 0, 1433 0, 1434 /*109*/ MatMatSolve_ScaLAPACK, 1435 0, 1436 0, 1437 0, 1438 MatMissingDiagonal_ScaLAPACK, 1439 /*114*/ 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /*119*/ 0, 1445 MatHermitianTranspose_ScaLAPACK, 1446 0, 1447 0, 1448 0, 1449 /*124*/ 0, 1450 0, 1451 0, 1452 0, 1453 0, 1454 /*129*/ 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 /*134*/ 0, 1460 0, 1461 0, 1462 0, 1463 0, 1464 0, 1465 /*140*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*145*/ 0, 1471 0, 1472 0, 1473 0, 1474 0, 1475 /*150*/ 0}; 1476 1477 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1478 { 1479 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1480 PetscInt size = stash->size, nsends; 1481 PetscInt count, *sindices, **rindices, i, j, l; 1482 PetscScalar **rvalues, *svalues; 1483 MPI_Comm comm = stash->comm; 1484 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1485 PetscMPIInt *sizes, *nlengths, nreceives; 1486 PetscInt *sp_idx, *sp_idy; 1487 PetscScalar *sp_val; 1488 PetscMatStashSpace space, space_next; 1489 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1490 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1491 1492 PetscFunctionBegin; 1493 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1494 InsertMode addv; 1495 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1496 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1497 mat->insertmode = addv; /* in case this processor had no cache */ 1498 } 1499 1500 bs2 = stash->bs * stash->bs; 1501 1502 /* first count number of contributors to each processor */ 1503 PetscCall(PetscCalloc1(size, &nlengths)); 1504 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1505 1506 i = j = 0; 1507 space = stash->space_head; 1508 while (space) { 1509 space_next = space->next; 1510 for (l = 0; l < space->local_used; l++) { 1511 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1512 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1513 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1514 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1515 nlengths[j]++; 1516 owner[i] = j; 1517 i++; 1518 } 1519 space = space_next; 1520 } 1521 1522 /* Now check what procs get messages - and compute nsends. */ 1523 PetscCall(PetscCalloc1(size, &sizes)); 1524 for (i = 0, nsends = 0; i < size; i++) { 1525 if (nlengths[i]) { 1526 sizes[i] = 1; 1527 nsends++; 1528 } 1529 } 1530 1531 { 1532 PetscMPIInt *onodes, *olengths; 1533 /* Determine the number of messages to expect, their lengths, from from-ids */ 1534 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1535 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1536 /* since clubbing row,col - lengths are multiplied by 2 */ 1537 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1538 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1539 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1540 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1541 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1542 PetscCall(PetscFree(onodes)); 1543 PetscCall(PetscFree(olengths)); 1544 } 1545 1546 /* do sends: 1547 1) starts[i] gives the starting index in svalues for stuff going to 1548 the ith processor 1549 */ 1550 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1551 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1552 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1553 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1554 startv[0] = 0; 1555 starti[0] = 0; 1556 for (i = 1; i < size; i++) { 1557 startv[i] = startv[i - 1] + nlengths[i - 1]; 1558 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1559 } 1560 1561 i = 0; 1562 space = stash->space_head; 1563 while (space) { 1564 space_next = space->next; 1565 sp_idx = space->idx; 1566 sp_idy = space->idy; 1567 sp_val = space->val; 1568 for (l = 0; l < space->local_used; l++) { 1569 j = owner[i]; 1570 if (bs2 == 1) { 1571 svalues[startv[j]] = sp_val[l]; 1572 } else { 1573 PetscInt k; 1574 PetscScalar *buf1, *buf2; 1575 buf1 = svalues + bs2 * startv[j]; 1576 buf2 = space->val + bs2 * l; 1577 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1578 } 1579 sindices[starti[j]] = sp_idx[l]; 1580 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1581 startv[j]++; 1582 starti[j]++; 1583 i++; 1584 } 1585 space = space_next; 1586 } 1587 startv[0] = 0; 1588 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1589 1590 for (i = 0, count = 0; i < size; i++) { 1591 if (sizes[i]) { 1592 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1593 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1594 } 1595 } 1596 #if defined(PETSC_USE_INFO) 1597 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1598 for (i = 0; i < size; i++) { 1599 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1600 } 1601 #endif 1602 PetscCall(PetscFree(nlengths)); 1603 PetscCall(PetscFree(owner)); 1604 PetscCall(PetscFree2(startv, starti)); 1605 PetscCall(PetscFree(sizes)); 1606 1607 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1608 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1609 1610 for (i = 0; i < nreceives; i++) { 1611 recv_waits[2 * i] = recv_waits1[i]; 1612 recv_waits[2 * i + 1] = recv_waits2[i]; 1613 } 1614 stash->recv_waits = recv_waits; 1615 1616 PetscCall(PetscFree(recv_waits1)); 1617 PetscCall(PetscFree(recv_waits2)); 1618 1619 stash->svalues = svalues; 1620 stash->sindices = sindices; 1621 stash->rvalues = rvalues; 1622 stash->rindices = rindices; 1623 stash->send_waits = send_waits; 1624 stash->nsends = nsends; 1625 stash->nrecvs = nreceives; 1626 stash->reproduce_count = 0; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1631 { 1632 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1633 1634 PetscFunctionBegin; 1635 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1636 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1637 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1638 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1639 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1640 PetscFunctionReturn(0); 1641 } 1642 1643 /*@ 1644 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1645 the `MATSCALAPACK` matrix 1646 1647 Logically Collective 1648 1649 Input Parameters: 1650 + A - a `MATSCALAPACK` matrix 1651 . mb - the row block size 1652 - nb - the column block size 1653 1654 Level: intermediate 1655 1656 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1657 @*/ 1658 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1659 { 1660 PetscFunctionBegin; 1661 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1662 PetscValidLogicalCollectiveInt(A, mb, 2); 1663 PetscValidLogicalCollectiveInt(A, nb, 3); 1664 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1669 { 1670 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1671 1672 PetscFunctionBegin; 1673 if (mb) *mb = a->mb; 1674 if (nb) *nb = a->nb; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /*@ 1679 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1680 the `MATSCALAPACK` matrix 1681 1682 Not collective 1683 1684 Input Parameter: 1685 . A - a `MATSCALAPACK` matrix 1686 1687 Output Parameters: 1688 + mb - the row block size 1689 - nb - the column block size 1690 1691 Level: intermediate 1692 1693 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1694 @*/ 1695 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1696 { 1697 PetscFunctionBegin; 1698 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1699 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1704 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1705 1706 /*MC 1707 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1708 1709 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1710 1711 Options Database Keys: 1712 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()` 1713 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1714 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1715 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1716 1717 Note: 1718 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1719 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1720 the given rank. 1721 1722 Level: beginner 1723 1724 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1725 M*/ 1726 1727 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1728 { 1729 Mat_ScaLAPACK *a; 1730 PetscBool flg, flg1; 1731 Mat_ScaLAPACK_Grid *grid; 1732 MPI_Comm icomm; 1733 PetscBLASInt nprow, npcol, myrow, mycol; 1734 PetscInt optv1, k = 2, array[2] = {0, 0}; 1735 PetscMPIInt size; 1736 1737 PetscFunctionBegin; 1738 PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 1739 A->insertmode = NOT_SET_VALUES; 1740 1741 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1742 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1743 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1744 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1745 A->stash.ScatterDestroy = NULL; 1746 1747 PetscCall(PetscNew(&a)); 1748 A->data = (void *)a; 1749 1750 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1751 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1752 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1753 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1754 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1755 } 1756 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1757 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1758 if (!flg) { 1759 PetscCall(PetscNew(&grid)); 1760 1761 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1762 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1763 1764 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1765 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1766 if (flg1) { 1767 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1768 grid->nprow = optv1; 1769 } 1770 PetscOptionsEnd(); 1771 1772 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1773 grid->npcol = size / grid->nprow; 1774 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1775 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1776 grid->ictxt = Csys2blacs_handle(icomm); 1777 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1778 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1779 grid->grid_refct = 1; 1780 grid->nprow = nprow; 1781 grid->npcol = npcol; 1782 grid->myrow = myrow; 1783 grid->mycol = mycol; 1784 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1785 grid->ictxrow = Csys2blacs_handle(icomm); 1786 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1787 grid->ictxcol = Csys2blacs_handle(icomm); 1788 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1789 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1790 1791 } else grid->grid_refct++; 1792 PetscCall(PetscCommDestroy(&icomm)); 1793 a->grid = grid; 1794 a->mb = DEFAULT_BLOCKSIZE; 1795 a->nb = DEFAULT_BLOCKSIZE; 1796 1797 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1798 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1799 if (flg) { 1800 a->mb = array[0]; 1801 a->nb = (k > 1) ? array[1] : a->mb; 1802 } 1803 PetscOptionsEnd(); 1804 1805 a->roworiented = PETSC_TRUE; 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1809 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 /*@C 1814 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1815 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1816 1817 Collective 1818 1819 Input Parameters: 1820 + comm - MPI communicator 1821 . mb - row block size (or `PETSC_DECIDE` to have it set) 1822 . nb - column block size (or `PETSC_DECIDE` to have it set) 1823 . M - number of global rows 1824 . N - number of global columns 1825 . rsrc - coordinate of process that owns the first row of the distributed matrix 1826 - csrc - coordinate of process that owns the first column of the distributed matrix 1827 1828 Output Parameter: 1829 . A - the matrix 1830 1831 Options Database Key: 1832 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1833 1834 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1835 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1836 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1837 1838 Note: 1839 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value 1840 is chosen. 1841 1842 Storage Information: 1843 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1844 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1845 significance and are thus ignored. The block sizes refer to the values 1846 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1847 1848 Level: intermediate 1849 1850 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1851 @*/ 1852 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1853 { 1854 Mat_ScaLAPACK *a; 1855 PetscInt m, n; 1856 1857 PetscFunctionBegin; 1858 PetscCall(MatCreate(comm, A)); 1859 PetscCall(MatSetType(*A, MATSCALAPACK)); 1860 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1861 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1862 m = PETSC_DECIDE; 1863 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1864 n = PETSC_DECIDE; 1865 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1866 PetscCall(MatSetSizes(*A, m, n, M, N)); 1867 a = (Mat_ScaLAPACK *)(*A)->data; 1868 PetscCall(PetscBLASIntCast(M, &a->M)); 1869 PetscCall(PetscBLASIntCast(N, &a->N)); 1870 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1871 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1872 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1873 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1874 PetscCall(MatSetUp(*A)); 1875 PetscFunctionReturn(0); 1876 } 1877