1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) { 23 PetscFunctionBegin; 24 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 25 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 26 PetscFunctionReturn(0); 27 } 28 29 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) { 30 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 31 PetscBool iascii; 32 PetscViewerFormat format; 33 Mat Adense; 34 35 PetscFunctionBegin; 36 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 37 if (iascii) { 38 PetscCall(PetscViewerGetFormat(viewer, &format)); 39 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 40 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 41 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 42 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 44 PetscFunctionReturn(0); 45 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 46 PetscFunctionReturn(0); 47 } 48 } 49 /* convert to dense format and call MatView() */ 50 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 51 PetscCall(MatView(Adense, viewer)); 52 PetscCall(MatDestroy(&Adense)); 53 PetscFunctionReturn(0); 54 } 55 56 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) { 57 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 58 PetscLogDouble isend[2], irecv[2]; 59 60 PetscFunctionBegin; 61 info->block_size = 1.0; 62 63 isend[0] = a->lld * a->locc; /* locally allocated */ 64 isend[1] = a->locr * a->locc; /* used submatrix */ 65 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 66 info->nz_allocated = isend[0]; 67 info->nz_used = isend[1]; 68 } else if (flag == MAT_GLOBAL_MAX) { 69 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 70 info->nz_allocated = irecv[0]; 71 info->nz_used = irecv[1]; 72 } else if (flag == MAT_GLOBAL_SUM) { 73 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 74 info->nz_allocated = irecv[0]; 75 info->nz_used = irecv[1]; 76 } 77 78 info->nz_unneeded = 0; 79 info->assemblies = A->num_ass; 80 info->mallocs = 0; 81 info->memory = 0; /* REVIEW ME */ 82 info->fill_ratio_given = 0; 83 info->fill_ratio_needed = 0; 84 info->factor_mallocs = 0; 85 PetscFunctionReturn(0); 86 } 87 88 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) { 89 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 90 91 PetscFunctionBegin; 92 switch (op) { 93 case MAT_NEW_NONZERO_LOCATIONS: 94 case MAT_NEW_NONZERO_LOCATION_ERR: 95 case MAT_NEW_NONZERO_ALLOCATION_ERR: 96 case MAT_SYMMETRIC: 97 case MAT_SORTED_FULL: 98 case MAT_HERMITIAN: break; 99 case MAT_ROW_ORIENTED: a->roworiented = flg; break; 100 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 101 } 102 PetscFunctionReturn(0); 103 } 104 105 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) { 106 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 107 PetscInt i, j; 108 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 109 PetscBool roworiented = a->roworiented; 110 111 PetscFunctionBegin; 112 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 113 for (i = 0; i < nr; i++) { 114 if (rows[i] < 0) continue; 115 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 116 for (j = 0; j < nc; j++) { 117 if (cols[j] < 0) continue; 118 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 119 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 120 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 121 if (roworiented) { 122 switch (imode) { 123 case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break; 124 default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break; 125 } 126 } else { 127 switch (imode) { 128 case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break; 129 default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break; 130 } 131 } 132 } else { 133 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 134 A->assembled = PETSC_FALSE; 135 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) { 143 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 144 PetscScalar *x2d, *y2d, alpha = 1.0; 145 const PetscInt *ranges; 146 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 147 148 PetscFunctionBegin; 149 if (transpose) { 150 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 151 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 152 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 153 xlld = PetscMax(1, A->rmap->n); 154 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 155 PetscCheckScaLapackInfo("descinit", info); 156 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 157 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 158 ylld = 1; 159 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 160 PetscCheckScaLapackInfo("descinit", info); 161 162 /* allocate 2d vectors */ 163 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 164 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 165 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 166 xlld = PetscMax(1, lszx); 167 168 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 169 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 170 PetscCheckScaLapackInfo("descinit", info); 171 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 172 PetscCheckScaLapackInfo("descinit", info); 173 174 /* redistribute x as a column of a 2d matrix */ 175 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 176 177 /* redistribute y as a row of a 2d matrix */ 178 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 179 180 /* call PBLAS subroutine */ 181 PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 182 183 /* redistribute y from a row of a 2d matrix */ 184 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 185 186 } else { /* non-transpose */ 187 188 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 189 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 190 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 191 xlld = 1; 192 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 193 PetscCheckScaLapackInfo("descinit", info); 194 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 195 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 196 ylld = PetscMax(1, A->rmap->n); 197 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 198 PetscCheckScaLapackInfo("descinit", info); 199 200 /* allocate 2d vectors */ 201 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 202 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 203 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 204 ylld = PetscMax(1, lszy); 205 206 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 207 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 208 PetscCheckScaLapackInfo("descinit", info); 209 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 210 PetscCheckScaLapackInfo("descinit", info); 211 212 /* redistribute x as a row of a 2d matrix */ 213 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 214 215 /* redistribute y as a column of a 2d matrix */ 216 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 217 218 /* call PBLAS subroutine */ 219 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 220 221 /* redistribute y from a column of a 2d matrix */ 222 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 223 } 224 PetscCall(PetscFree2(x2d, y2d)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) { 229 const PetscScalar *xarray; 230 PetscScalar *yarray; 231 232 PetscFunctionBegin; 233 PetscCall(VecGetArrayRead(x, &xarray)); 234 PetscCall(VecGetArray(y, &yarray)); 235 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 236 PetscCall(VecRestoreArrayRead(x, &xarray)); 237 PetscCall(VecRestoreArray(y, &yarray)); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) { 242 const PetscScalar *xarray; 243 PetscScalar *yarray; 244 245 PetscFunctionBegin; 246 PetscCall(VecGetArrayRead(x, &xarray)); 247 PetscCall(VecGetArray(y, &yarray)); 248 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 249 PetscCall(VecRestoreArrayRead(x, &xarray)); 250 PetscCall(VecRestoreArray(y, &yarray)); 251 PetscFunctionReturn(0); 252 } 253 254 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { 255 const PetscScalar *xarray; 256 PetscScalar *zarray; 257 258 PetscFunctionBegin; 259 if (y != z) PetscCall(VecCopy(y, z)); 260 PetscCall(VecGetArrayRead(x, &xarray)); 261 PetscCall(VecGetArray(z, &zarray)); 262 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 263 PetscCall(VecRestoreArrayRead(x, &xarray)); 264 PetscCall(VecRestoreArray(z, &zarray)); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { 269 const PetscScalar *xarray; 270 PetscScalar *zarray; 271 272 PetscFunctionBegin; 273 if (y != z) PetscCall(VecCopy(y, z)); 274 PetscCall(VecGetArrayRead(x, &xarray)); 275 PetscCall(VecGetArray(z, &zarray)); 276 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 277 PetscCall(VecRestoreArrayRead(x, &xarray)); 278 PetscCall(VecRestoreArray(z, &zarray)); 279 PetscFunctionReturn(0); 280 } 281 282 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { 283 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 284 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 285 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 286 PetscScalar sone = 1.0, zero = 0.0; 287 PetscBLASInt one = 1; 288 289 PetscFunctionBegin; 290 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 291 C->assembled = PETSC_TRUE; 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { 296 PetscFunctionBegin; 297 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 298 PetscCall(MatSetType(C, MATSCALAPACK)); 299 PetscCall(MatSetUp(C)); 300 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 301 PetscFunctionReturn(0); 302 } 303 304 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { 305 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 306 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 307 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 308 PetscScalar sone = 1.0, zero = 0.0; 309 PetscBLASInt one = 1; 310 311 PetscFunctionBegin; 312 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 313 C->assembled = PETSC_TRUE; 314 PetscFunctionReturn(0); 315 } 316 317 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { 318 PetscFunctionBegin; 319 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 320 PetscCall(MatSetType(C, MATSCALAPACK)); 321 PetscCall(MatSetUp(C)); 322 PetscFunctionReturn(0); 323 } 324 325 /* --------------------------------------- */ 326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) { 327 PetscFunctionBegin; 328 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 329 C->ops->productsymbolic = MatProductSymbolic_AB; 330 PetscFunctionReturn(0); 331 } 332 333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) { 334 PetscFunctionBegin; 335 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 336 C->ops->productsymbolic = MatProductSymbolic_ABt; 337 PetscFunctionReturn(0); 338 } 339 340 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) { 341 Mat_Product *product = C->product; 342 343 PetscFunctionBegin; 344 switch (product->type) { 345 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break; 346 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break; 347 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 348 } 349 PetscFunctionReturn(0); 350 } 351 /* --------------------------------------- */ 352 353 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) { 354 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 355 PetscScalar *darray, *d2d, v; 356 const PetscInt *ranges; 357 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 358 359 PetscFunctionBegin; 360 PetscCall(VecGetArray(D, &darray)); 361 362 if (A->rmap->N <= A->cmap->N) { /* row version */ 363 364 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 365 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 366 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 367 dlld = PetscMax(1, A->rmap->n); 368 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 369 PetscCheckScaLapackInfo("descinit", info); 370 371 /* allocate 2d vector */ 372 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 373 PetscCall(PetscCalloc1(lszd, &d2d)); 374 dlld = PetscMax(1, lszd); 375 376 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 377 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 378 PetscCheckScaLapackInfo("descinit", info); 379 380 /* collect diagonal */ 381 for (j = 1; j <= a->M; j++) { 382 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 383 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 384 } 385 386 /* redistribute d from a column of a 2d matrix */ 387 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 388 PetscCall(PetscFree(d2d)); 389 390 } else { /* column version */ 391 392 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 393 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 394 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 395 dlld = 1; 396 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 397 PetscCheckScaLapackInfo("descinit", info); 398 399 /* allocate 2d vector */ 400 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 401 PetscCall(PetscCalloc1(lszd, &d2d)); 402 403 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 404 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 405 PetscCheckScaLapackInfo("descinit", info); 406 407 /* collect diagonal */ 408 for (j = 1; j <= a->N; j++) { 409 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 410 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 411 } 412 413 /* redistribute d from a row of a 2d matrix */ 414 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 415 PetscCall(PetscFree(d2d)); 416 } 417 418 PetscCall(VecRestoreArray(D, &darray)); 419 PetscCall(VecAssemblyBegin(D)); 420 PetscCall(VecAssemblyEnd(D)); 421 PetscFunctionReturn(0); 422 } 423 424 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) { 425 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 426 const PetscScalar *d; 427 const PetscInt *ranges; 428 PetscScalar *d2d; 429 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 430 431 PetscFunctionBegin; 432 if (R) { 433 PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 434 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 435 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 436 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 437 dlld = 1; 438 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 439 PetscCheckScaLapackInfo("descinit", info); 440 441 /* allocate 2d vector */ 442 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 443 PetscCall(PetscCalloc1(lszd, &d2d)); 444 445 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 446 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 447 PetscCheckScaLapackInfo("descinit", info); 448 449 /* redistribute d to a row of a 2d matrix */ 450 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 451 452 /* broadcast along process columns */ 453 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 454 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 455 456 /* local scaling */ 457 for (j = 0; j < a->locc; j++) 458 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 459 460 PetscCall(PetscFree(d2d)); 461 PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 462 } 463 if (L) { 464 PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 465 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 466 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 467 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 468 dlld = PetscMax(1, A->rmap->n); 469 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 470 PetscCheckScaLapackInfo("descinit", info); 471 472 /* allocate 2d vector */ 473 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 474 PetscCall(PetscCalloc1(lszd, &d2d)); 475 dlld = PetscMax(1, lszd); 476 477 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 478 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 479 PetscCheckScaLapackInfo("descinit", info); 480 481 /* redistribute d to a column of a 2d matrix */ 482 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 483 484 /* broadcast along process rows */ 485 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 486 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 487 488 /* local scaling */ 489 for (i = 0; i < a->locr; i++) 490 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 491 492 PetscCall(PetscFree(d2d)); 493 PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 494 } 495 PetscFunctionReturn(0); 496 } 497 498 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) { 499 PetscFunctionBegin; 500 *missing = PETSC_FALSE; 501 PetscFunctionReturn(0); 502 } 503 504 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) { 505 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 506 PetscBLASInt n, one = 1; 507 508 PetscFunctionBegin; 509 n = x->lld * x->locc; 510 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 511 PetscFunctionReturn(0); 512 } 513 514 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) { 515 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 516 PetscBLASInt i, n; 517 PetscScalar v; 518 519 PetscFunctionBegin; 520 n = PetscMin(x->M, x->N); 521 for (i = 1; i <= n; i++) { 522 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 523 v += alpha; 524 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 530 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 531 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 532 PetscBLASInt one = 1; 533 PetscScalar beta = 1.0; 534 535 PetscFunctionBegin; 536 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 537 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 538 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 539 PetscFunctionReturn(0); 540 } 541 542 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) { 543 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 544 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 545 546 PetscFunctionBegin; 547 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 548 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) { 553 Mat Bs; 554 MPI_Comm comm; 555 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 556 557 PetscFunctionBegin; 558 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 559 PetscCall(MatCreate(comm, &Bs)); 560 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 561 PetscCall(MatSetType(Bs, MATSCALAPACK)); 562 b = (Mat_ScaLAPACK *)Bs->data; 563 b->M = a->M; 564 b->N = a->N; 565 b->mb = a->mb; 566 b->nb = a->nb; 567 b->rsrc = a->rsrc; 568 b->csrc = a->csrc; 569 PetscCall(MatSetUp(Bs)); 570 *B = Bs; 571 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 572 Bs->assembled = PETSC_TRUE; 573 PetscFunctionReturn(0); 574 } 575 576 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { 577 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 578 Mat Bs = *B; 579 PetscBLASInt one = 1; 580 PetscScalar sone = 1.0, zero = 0.0; 581 #if defined(PETSC_USE_COMPLEX) 582 PetscInt i, j; 583 #endif 584 585 PetscFunctionBegin; 586 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 587 if (reuse == MAT_INITIAL_MATRIX) { 588 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 589 *B = Bs; 590 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 591 b = (Mat_ScaLAPACK *)Bs->data; 592 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 593 #if defined(PETSC_USE_COMPLEX) 594 /* undo conjugation */ 595 for (i = 0; i < b->locr; i++) 596 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 597 #endif 598 Bs->assembled = PETSC_TRUE; 599 PetscFunctionReturn(0); 600 } 601 602 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) { 603 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 604 PetscInt i, j; 605 606 PetscFunctionBegin; 607 for (i = 0; i < a->locr; i++) 608 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 609 PetscFunctionReturn(0); 610 } 611 612 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { 613 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 614 Mat Bs = *B; 615 PetscBLASInt one = 1; 616 PetscScalar sone = 1.0, zero = 0.0; 617 618 PetscFunctionBegin; 619 if (reuse == MAT_INITIAL_MATRIX) { 620 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 621 *B = Bs; 622 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 623 b = (Mat_ScaLAPACK *)Bs->data; 624 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 625 Bs->assembled = PETSC_TRUE; 626 PetscFunctionReturn(0); 627 } 628 629 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) { 630 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 631 PetscScalar *x, *x2d; 632 const PetscInt *ranges; 633 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 634 635 PetscFunctionBegin; 636 PetscCall(VecCopy(B, X)); 637 PetscCall(VecGetArray(X, &x)); 638 639 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 640 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 641 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 642 xlld = PetscMax(1, A->rmap->n); 643 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 644 PetscCheckScaLapackInfo("descinit", info); 645 646 /* allocate 2d vector */ 647 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 648 PetscCall(PetscMalloc1(lszx, &x2d)); 649 xlld = PetscMax(1, lszx); 650 651 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 652 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 653 PetscCheckScaLapackInfo("descinit", info); 654 655 /* redistribute x as a column of a 2d matrix */ 656 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 657 658 /* call ScaLAPACK subroutine */ 659 switch (A->factortype) { 660 case MAT_FACTOR_LU: 661 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 662 PetscCheckScaLapackInfo("getrs", info); 663 break; 664 case MAT_FACTOR_CHOLESKY: 665 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 666 PetscCheckScaLapackInfo("potrs", info); 667 break; 668 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 669 } 670 671 /* redistribute x from a column of a 2d matrix */ 672 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 673 674 PetscCall(PetscFree(x2d)); 675 PetscCall(VecRestoreArray(X, &x)); 676 PetscFunctionReturn(0); 677 } 678 679 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) { 680 PetscFunctionBegin; 681 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 682 PetscCall(VecAXPY(X, 1, Y)); 683 PetscFunctionReturn(0); 684 } 685 686 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) { 687 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 688 PetscBool flg1, flg2; 689 PetscBLASInt one = 1, info; 690 691 PetscFunctionBegin; 692 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 693 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 694 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 695 MatScaLAPACKCheckDistribution(B, 1, X, 2); 696 b = (Mat_ScaLAPACK *)B->data; 697 x = (Mat_ScaLAPACK *)X->data; 698 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 699 700 switch (A->factortype) { 701 case MAT_FACTOR_LU: 702 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 703 PetscCheckScaLapackInfo("getrs", info); 704 break; 705 case MAT_FACTOR_CHOLESKY: 706 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 707 PetscCheckScaLapackInfo("potrs", info); 708 break; 709 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 710 } 711 PetscFunctionReturn(0); 712 } 713 714 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) { 715 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 716 PetscBLASInt one = 1, info; 717 718 PetscFunctionBegin; 719 if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); } 720 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 721 PetscCheckScaLapackInfo("getrf", info); 722 A->factortype = MAT_FACTOR_LU; 723 A->assembled = PETSC_TRUE; 724 725 PetscCall(PetscFree(A->solvertype)); 726 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 727 PetscFunctionReturn(0); 728 } 729 730 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { 731 PetscFunctionBegin; 732 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 733 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 734 PetscFunctionReturn(0); 735 } 736 737 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) { 738 PetscFunctionBegin; 739 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 740 PetscFunctionReturn(0); 741 } 742 743 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) { 744 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 745 PetscBLASInt one = 1, info; 746 747 PetscFunctionBegin; 748 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 749 PetscCheckScaLapackInfo("potrf", info); 750 A->factortype = MAT_FACTOR_CHOLESKY; 751 A->assembled = PETSC_TRUE; 752 753 PetscCall(PetscFree(A->solvertype)); 754 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 755 PetscFunctionReturn(0); 756 } 757 758 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { 759 PetscFunctionBegin; 760 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 761 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 762 PetscFunctionReturn(0); 763 } 764 765 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) { 766 PetscFunctionBegin; 767 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) { 772 PetscFunctionBegin; 773 *type = MATSOLVERSCALAPACK; 774 PetscFunctionReturn(0); 775 } 776 777 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) { 778 Mat B; 779 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 780 781 PetscFunctionBegin; 782 /* Create the factorization matrix */ 783 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 784 B->trivialsymbolic = PETSC_TRUE; 785 B->factortype = ftype; 786 PetscCall(PetscFree(B->solvertype)); 787 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 788 789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 790 *F = B; 791 PetscFunctionReturn(0); 792 } 793 794 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) { 795 PetscFunctionBegin; 796 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 797 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 798 PetscFunctionReturn(0); 799 } 800 801 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) { 802 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 803 PetscBLASInt one = 1, lwork = 0; 804 const char *ntype; 805 PetscScalar *work = NULL, dummy; 806 807 PetscFunctionBegin; 808 switch (type) { 809 case NORM_1: 810 ntype = "1"; 811 lwork = PetscMax(a->locr, a->locc); 812 break; 813 case NORM_FROBENIUS: 814 ntype = "F"; 815 work = &dummy; 816 break; 817 case NORM_INFINITY: 818 ntype = "I"; 819 lwork = PetscMax(a->locr, a->locc); 820 break; 821 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 822 } 823 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 824 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 825 if (lwork) PetscCall(PetscFree(work)); 826 PetscFunctionReturn(0); 827 } 828 829 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) { 830 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 831 832 PetscFunctionBegin; 833 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 834 PetscFunctionReturn(0); 835 } 836 837 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) { 838 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 839 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 840 841 PetscFunctionBegin; 842 if (rows) { 843 n = a->locr; 844 nb = a->mb; 845 isrc = a->rsrc; 846 nproc = a->grid->nprow; 847 iproc = a->grid->myrow; 848 PetscCall(PetscMalloc1(n, &idx)); 849 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 850 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 851 } 852 if (cols) { 853 n = a->locc; 854 nb = a->nb; 855 isrc = a->csrc; 856 nproc = a->grid->npcol; 857 iproc = a->grid->mycol; 858 PetscCall(PetscMalloc1(n, &idx)); 859 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 860 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 861 } 862 PetscFunctionReturn(0); 863 } 864 865 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) { 866 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 867 Mat Bmpi; 868 MPI_Comm comm; 869 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 870 const PetscInt *ranges, *branges, *cwork; 871 const PetscScalar *vwork; 872 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 873 PetscScalar *barray; 874 PetscBool differ = PETSC_FALSE; 875 PetscMPIInt size; 876 877 PetscFunctionBegin; 878 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 879 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 880 881 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 882 PetscCallMPI(MPI_Comm_size(comm, &size)); 883 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 884 for (i = 0; i < size; i++) 885 if (ranges[i + 1] != branges[i + 1]) { 886 differ = PETSC_TRUE; 887 break; 888 } 889 } 890 891 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 892 PetscCall(MatCreate(comm, &Bmpi)); 893 m = PETSC_DECIDE; 894 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 895 n = PETSC_DECIDE; 896 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 897 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 898 PetscCall(MatSetType(Bmpi, MATDENSE)); 899 PetscCall(MatSetUp(Bmpi)); 900 901 /* create ScaLAPACK descriptor for B (1d block distribution) */ 902 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 903 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 904 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 905 PetscCheckScaLapackInfo("descinit", info); 906 907 /* redistribute matrix */ 908 PetscCall(MatDenseGetArray(Bmpi, &barray)); 909 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 910 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 911 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 912 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 913 914 /* transfer rows of auxiliary matrix to the final matrix B */ 915 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 916 for (i = rstart; i < rend; i++) { 917 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 918 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 919 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 920 } 921 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 922 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatDestroy(&Bmpi)); 924 925 } else { /* normal cases */ 926 927 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 928 else { 929 PetscCall(MatCreate(comm, &Bmpi)); 930 m = PETSC_DECIDE; 931 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 932 n = PETSC_DECIDE; 933 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 934 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 935 PetscCall(MatSetType(Bmpi, MATDENSE)); 936 PetscCall(MatSetUp(Bmpi)); 937 } 938 939 /* create ScaLAPACK descriptor for B (1d block distribution) */ 940 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 941 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 942 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 943 PetscCheckScaLapackInfo("descinit", info); 944 945 /* redistribute matrix */ 946 PetscCall(MatDenseGetArray(Bmpi, &barray)); 947 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 948 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 949 950 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 951 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 952 if (reuse == MAT_INPLACE_MATRIX) { 953 PetscCall(MatHeaderReplace(A, &Bmpi)); 954 } else *B = Bmpi; 955 } 956 PetscFunctionReturn(0); 957 } 958 959 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) { 960 const PetscInt *ranges; 961 PetscMPIInt size; 962 PetscInt i, n; 963 964 PetscFunctionBegin; 965 *correct = PETSC_TRUE; 966 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 967 if (size > 1) { 968 PetscCall(PetscLayoutGetRanges(map, &ranges)); 969 n = ranges[1] - ranges[0]; 970 for (i = 1; i < size; i++) 971 if (ranges[i + 1] - ranges[i] != n) break; 972 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 973 } 974 PetscFunctionReturn(0); 975 } 976 977 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) { 978 Mat_ScaLAPACK *b; 979 Mat Bmpi; 980 MPI_Comm comm; 981 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 982 const PetscInt *ranges, *rows, *cols; 983 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 984 PetscScalar *aarray; 985 IS ir, ic; 986 PetscInt lda; 987 PetscBool flg; 988 989 PetscFunctionBegin; 990 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 991 992 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 993 else { 994 PetscCall(MatCreate(comm, &Bmpi)); 995 m = PETSC_DECIDE; 996 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 997 n = PETSC_DECIDE; 998 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 999 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1000 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1001 PetscCall(MatSetUp(Bmpi)); 1002 } 1003 b = (Mat_ScaLAPACK *)Bmpi->data; 1004 1005 PetscCall(MatDenseGetLDA(A, &lda)); 1006 PetscCall(MatDenseGetArray(A, &aarray)); 1007 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1008 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1009 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1010 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1011 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1012 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1013 lld = PetscMax(lda, 1); /* local leading dimension */ 1014 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1015 PetscCheckScaLapackInfo("descinit", info); 1016 1017 /* redistribute matrix */ 1018 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1019 Bmpi->nooffprocentries = PETSC_TRUE; 1020 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1021 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1022 b->roworiented = PETSC_FALSE; 1023 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1024 PetscCall(ISGetIndices(ir, &rows)); 1025 PetscCall(ISGetIndices(ic, &cols)); 1026 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1027 PetscCall(ISRestoreIndices(ir, &rows)); 1028 PetscCall(ISRestoreIndices(ic, &cols)); 1029 PetscCall(ISDestroy(&ic)); 1030 PetscCall(ISDestroy(&ir)); 1031 } 1032 PetscCall(MatDenseRestoreArray(A, &aarray)); 1033 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1034 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1035 if (reuse == MAT_INPLACE_MATRIX) { 1036 PetscCall(MatHeaderReplace(A, &Bmpi)); 1037 } else *B = Bmpi; 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1042 Mat mat_scal; 1043 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1044 const PetscInt *cols; 1045 const PetscScalar *vals; 1046 1047 PetscFunctionBegin; 1048 if (reuse == MAT_REUSE_MATRIX) { 1049 mat_scal = *newmat; 1050 PetscCall(MatZeroEntries(mat_scal)); 1051 } else { 1052 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1053 m = PETSC_DECIDE; 1054 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1055 n = PETSC_DECIDE; 1056 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1057 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1058 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1059 PetscCall(MatSetUp(mat_scal)); 1060 } 1061 for (row = rstart; row < rend; row++) { 1062 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1063 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1064 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1065 } 1066 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1067 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1068 1069 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1070 else *newmat = mat_scal; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1075 Mat mat_scal; 1076 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1077 const PetscInt *cols; 1078 const PetscScalar *vals; 1079 PetscScalar v; 1080 1081 PetscFunctionBegin; 1082 if (reuse == MAT_REUSE_MATRIX) { 1083 mat_scal = *newmat; 1084 PetscCall(MatZeroEntries(mat_scal)); 1085 } else { 1086 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1087 m = PETSC_DECIDE; 1088 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1089 n = PETSC_DECIDE; 1090 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1091 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1092 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1093 PetscCall(MatSetUp(mat_scal)); 1094 } 1095 PetscCall(MatGetRowUpperTriangular(A)); 1096 for (row = rstart; row < rend; row++) { 1097 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1098 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1099 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1100 if (cols[j] == row) continue; 1101 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1102 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1103 } 1104 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1105 } 1106 PetscCall(MatRestoreRowUpperTriangular(A)); 1107 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1108 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1109 1110 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1111 else *newmat = mat_scal; 1112 PetscFunctionReturn(0); 1113 } 1114 1115 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) { 1116 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1117 PetscInt sz = 0; 1118 1119 PetscFunctionBegin; 1120 PetscCall(PetscLayoutSetUp(A->rmap)); 1121 PetscCall(PetscLayoutSetUp(A->cmap)); 1122 if (!a->lld) a->lld = a->locr; 1123 1124 PetscCall(PetscFree(a->loc)); 1125 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1126 PetscCall(PetscCalloc1(sz, &a->loc)); 1127 1128 A->preallocated = PETSC_TRUE; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) { 1133 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1134 Mat_ScaLAPACK_Grid *grid; 1135 PetscBool flg; 1136 MPI_Comm icomm; 1137 1138 PetscFunctionBegin; 1139 PetscCall(MatStashDestroy_Private(&A->stash)); 1140 PetscCall(PetscFree(a->loc)); 1141 PetscCall(PetscFree(a->pivots)); 1142 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1143 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1144 if (--grid->grid_refct == 0) { 1145 Cblacs_gridexit(grid->ictxt); 1146 Cblacs_gridexit(grid->ictxrow); 1147 Cblacs_gridexit(grid->ictxcol); 1148 PetscCall(PetscFree(grid)); 1149 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1150 } 1151 PetscCall(PetscCommDestroy(&icomm)); 1152 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1153 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1154 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1156 PetscCall(PetscFree(A->data)); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) { 1161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1162 PetscBLASInt info = 0; 1163 PetscBool flg; 1164 1165 PetscFunctionBegin; 1166 PetscCall(PetscLayoutSetUp(A->rmap)); 1167 PetscCall(PetscLayoutSetUp(A->cmap)); 1168 1169 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1170 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1171 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1172 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1173 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1174 1175 /* compute local sizes */ 1176 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1177 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1178 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1179 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1180 a->lld = PetscMax(1, a->locr); 1181 1182 /* allocate local array */ 1183 PetscCall(MatScaLAPACKSetPreallocation(A)); 1184 1185 /* set up ScaLAPACK descriptor */ 1186 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1187 PetscCheckScaLapackInfo("descinit", info); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) { 1192 PetscInt nstash, reallocs; 1193 1194 PetscFunctionBegin; 1195 if (A->nooffprocentries) PetscFunctionReturn(0); 1196 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1197 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1198 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) { 1203 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1204 PetscMPIInt n; 1205 PetscInt i, flg, *row, *col; 1206 PetscScalar *val; 1207 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1208 1209 PetscFunctionBegin; 1210 if (A->nooffprocentries) PetscFunctionReturn(0); 1211 while (1) { 1212 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1213 if (!flg) break; 1214 for (i = 0; i < n; i++) { 1215 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1216 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1217 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1218 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1219 switch (A->insertmode) { 1220 case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break; 1221 case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break; 1222 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1223 } 1224 } 1225 } 1226 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) { 1231 Mat Adense, As; 1232 MPI_Comm comm; 1233 1234 PetscFunctionBegin; 1235 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1236 PetscCall(MatCreate(comm, &Adense)); 1237 PetscCall(MatSetType(Adense, MATDENSE)); 1238 PetscCall(MatLoad(Adense, viewer)); 1239 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1240 PetscCall(MatDestroy(&Adense)); 1241 PetscCall(MatHeaderReplace(newMat, &As)); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 /* -------------------------------------------------------------------*/ 1246 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1247 0, 1248 0, 1249 MatMult_ScaLAPACK, 1250 /* 4*/ MatMultAdd_ScaLAPACK, 1251 MatMultTranspose_ScaLAPACK, 1252 MatMultTransposeAdd_ScaLAPACK, 1253 MatSolve_ScaLAPACK, 1254 MatSolveAdd_ScaLAPACK, 1255 0, 1256 /*10*/ 0, 1257 MatLUFactor_ScaLAPACK, 1258 MatCholeskyFactor_ScaLAPACK, 1259 0, 1260 MatTranspose_ScaLAPACK, 1261 /*15*/ MatGetInfo_ScaLAPACK, 1262 0, 1263 MatGetDiagonal_ScaLAPACK, 1264 MatDiagonalScale_ScaLAPACK, 1265 MatNorm_ScaLAPACK, 1266 /*20*/ MatAssemblyBegin_ScaLAPACK, 1267 MatAssemblyEnd_ScaLAPACK, 1268 MatSetOption_ScaLAPACK, 1269 MatZeroEntries_ScaLAPACK, 1270 /*24*/ 0, 1271 MatLUFactorSymbolic_ScaLAPACK, 1272 MatLUFactorNumeric_ScaLAPACK, 1273 MatCholeskyFactorSymbolic_ScaLAPACK, 1274 MatCholeskyFactorNumeric_ScaLAPACK, 1275 /*29*/ MatSetUp_ScaLAPACK, 1276 0, 1277 0, 1278 0, 1279 0, 1280 /*34*/ MatDuplicate_ScaLAPACK, 1281 0, 1282 0, 1283 0, 1284 0, 1285 /*39*/ MatAXPY_ScaLAPACK, 1286 0, 1287 0, 1288 0, 1289 MatCopy_ScaLAPACK, 1290 /*44*/ 0, 1291 MatScale_ScaLAPACK, 1292 MatShift_ScaLAPACK, 1293 0, 1294 0, 1295 /*49*/ 0, 1296 0, 1297 0, 1298 0, 1299 0, 1300 /*54*/ 0, 1301 0, 1302 0, 1303 0, 1304 0, 1305 /*59*/ 0, 1306 MatDestroy_ScaLAPACK, 1307 MatView_ScaLAPACK, 1308 0, 1309 0, 1310 /*64*/ 0, 1311 0, 1312 0, 1313 0, 1314 0, 1315 /*69*/ 0, 1316 0, 1317 MatConvert_ScaLAPACK_Dense, 1318 0, 1319 0, 1320 /*74*/ 0, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*79*/ 0, 1326 0, 1327 0, 1328 0, 1329 MatLoad_ScaLAPACK, 1330 /*84*/ 0, 1331 0, 1332 0, 1333 0, 1334 0, 1335 /*89*/ 0, 1336 0, 1337 MatMatMultNumeric_ScaLAPACK, 1338 0, 1339 0, 1340 /*94*/ 0, 1341 0, 1342 0, 1343 MatMatTransposeMultNumeric_ScaLAPACK, 1344 0, 1345 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1346 0, 1347 0, 1348 MatConjugate_ScaLAPACK, 1349 0, 1350 /*104*/ 0, 1351 0, 1352 0, 1353 0, 1354 0, 1355 /*109*/ MatMatSolve_ScaLAPACK, 1356 0, 1357 0, 1358 0, 1359 MatMissingDiagonal_ScaLAPACK, 1360 /*114*/ 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*119*/ 0, 1366 MatHermitianTranspose_ScaLAPACK, 1367 0, 1368 0, 1369 0, 1370 /*124*/ 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 /*129*/ 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 /*134*/ 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 /*140*/ 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 /*145*/ 0, 1392 0, 1393 0, 1394 0, 1395 0, 1396 /*150*/ 0}; 1397 1398 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) { 1399 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1400 PetscInt size = stash->size, nsends; 1401 PetscInt count, *sindices, **rindices, i, j, l; 1402 PetscScalar **rvalues, *svalues; 1403 MPI_Comm comm = stash->comm; 1404 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1405 PetscMPIInt *sizes, *nlengths, nreceives; 1406 PetscInt *sp_idx, *sp_idy; 1407 PetscScalar *sp_val; 1408 PetscMatStashSpace space, space_next; 1409 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1410 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1411 1412 PetscFunctionBegin; 1413 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1414 InsertMode addv; 1415 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1416 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1417 mat->insertmode = addv; /* in case this processor had no cache */ 1418 } 1419 1420 bs2 = stash->bs * stash->bs; 1421 1422 /* first count number of contributors to each processor */ 1423 PetscCall(PetscCalloc1(size, &nlengths)); 1424 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1425 1426 i = j = 0; 1427 space = stash->space_head; 1428 while (space) { 1429 space_next = space->next; 1430 for (l = 0; l < space->local_used; l++) { 1431 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1432 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1433 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1434 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1435 nlengths[j]++; 1436 owner[i] = j; 1437 i++; 1438 } 1439 space = space_next; 1440 } 1441 1442 /* Now check what procs get messages - and compute nsends. */ 1443 PetscCall(PetscCalloc1(size, &sizes)); 1444 for (i = 0, nsends = 0; i < size; i++) { 1445 if (nlengths[i]) { 1446 sizes[i] = 1; 1447 nsends++; 1448 } 1449 } 1450 1451 { 1452 PetscMPIInt *onodes, *olengths; 1453 /* Determine the number of messages to expect, their lengths, from from-ids */ 1454 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1455 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1456 /* since clubbing row,col - lengths are multiplied by 2 */ 1457 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1458 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1459 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1460 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1461 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1462 PetscCall(PetscFree(onodes)); 1463 PetscCall(PetscFree(olengths)); 1464 } 1465 1466 /* do sends: 1467 1) starts[i] gives the starting index in svalues for stuff going to 1468 the ith processor 1469 */ 1470 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1471 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1472 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1473 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1474 startv[0] = 0; 1475 starti[0] = 0; 1476 for (i = 1; i < size; i++) { 1477 startv[i] = startv[i - 1] + nlengths[i - 1]; 1478 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1479 } 1480 1481 i = 0; 1482 space = stash->space_head; 1483 while (space) { 1484 space_next = space->next; 1485 sp_idx = space->idx; 1486 sp_idy = space->idy; 1487 sp_val = space->val; 1488 for (l = 0; l < space->local_used; l++) { 1489 j = owner[i]; 1490 if (bs2 == 1) { 1491 svalues[startv[j]] = sp_val[l]; 1492 } else { 1493 PetscInt k; 1494 PetscScalar *buf1, *buf2; 1495 buf1 = svalues + bs2 * startv[j]; 1496 buf2 = space->val + bs2 * l; 1497 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1498 } 1499 sindices[starti[j]] = sp_idx[l]; 1500 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1501 startv[j]++; 1502 starti[j]++; 1503 i++; 1504 } 1505 space = space_next; 1506 } 1507 startv[0] = 0; 1508 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1509 1510 for (i = 0, count = 0; i < size; i++) { 1511 if (sizes[i]) { 1512 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1513 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1514 } 1515 } 1516 #if defined(PETSC_USE_INFO) 1517 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1518 for (i = 0; i < size; i++) { 1519 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1520 } 1521 #endif 1522 PetscCall(PetscFree(nlengths)); 1523 PetscCall(PetscFree(owner)); 1524 PetscCall(PetscFree2(startv, starti)); 1525 PetscCall(PetscFree(sizes)); 1526 1527 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1528 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1529 1530 for (i = 0; i < nreceives; i++) { 1531 recv_waits[2 * i] = recv_waits1[i]; 1532 recv_waits[2 * i + 1] = recv_waits2[i]; 1533 } 1534 stash->recv_waits = recv_waits; 1535 1536 PetscCall(PetscFree(recv_waits1)); 1537 PetscCall(PetscFree(recv_waits2)); 1538 1539 stash->svalues = svalues; 1540 stash->sindices = sindices; 1541 stash->rvalues = rvalues; 1542 stash->rindices = rindices; 1543 stash->send_waits = send_waits; 1544 stash->nsends = nsends; 1545 stash->nrecvs = nreceives; 1546 stash->reproduce_count = 0; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) { 1551 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1552 1553 PetscFunctionBegin; 1554 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1555 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1556 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1557 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1558 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /*@ 1563 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1564 the `MATSCALAPACK` matrix 1565 1566 Logically Collective on A 1567 1568 Input Parameters: 1569 + A - a `MATSCALAPACK` matrix 1570 . mb - the row block size 1571 - nb - the column block size 1572 1573 Level: intermediate 1574 1575 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1576 @*/ 1577 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) { 1578 PetscFunctionBegin; 1579 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1580 PetscValidLogicalCollectiveInt(A, mb, 2); 1581 PetscValidLogicalCollectiveInt(A, nb, 3); 1582 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) { 1587 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1588 1589 PetscFunctionBegin; 1590 if (mb) *mb = a->mb; 1591 if (nb) *nb = a->nb; 1592 PetscFunctionReturn(0); 1593 } 1594 1595 /*@ 1596 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1597 the `MATSCALAPACK` matrix 1598 1599 Not collective 1600 1601 Input Parameter: 1602 . A - a `MATSCALAPACK` matrix 1603 1604 Output Parameters: 1605 + mb - the row block size 1606 - nb - the column block size 1607 1608 Level: intermediate 1609 1610 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1611 @*/ 1612 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) { 1613 PetscFunctionBegin; 1614 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1615 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1620 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1621 1622 /*MC 1623 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1624 1625 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1626 1627 Options Database Keys: 1628 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()` 1629 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1630 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1631 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1632 1633 Note: 1634 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1635 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1636 the given rank. 1637 1638 Level: beginner 1639 1640 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1641 M*/ 1642 1643 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) { 1644 Mat_ScaLAPACK *a; 1645 PetscBool flg, flg1; 1646 Mat_ScaLAPACK_Grid *grid; 1647 MPI_Comm icomm; 1648 PetscBLASInt nprow, npcol, myrow, mycol; 1649 PetscInt optv1, k = 2, array[2] = {0, 0}; 1650 PetscMPIInt size; 1651 1652 PetscFunctionBegin; 1653 PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 1654 A->insertmode = NOT_SET_VALUES; 1655 1656 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1657 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1658 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1659 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1660 A->stash.ScatterDestroy = NULL; 1661 1662 PetscCall(PetscNew(&a)); 1663 A->data = (void *)a; 1664 1665 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1666 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1667 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1668 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1669 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1670 } 1671 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1672 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1673 if (!flg) { 1674 PetscCall(PetscNew(&grid)); 1675 1676 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1677 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1678 1679 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1680 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1681 if (flg1) { 1682 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1683 grid->nprow = optv1; 1684 } 1685 PetscOptionsEnd(); 1686 1687 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1688 grid->npcol = size / grid->nprow; 1689 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1690 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1691 grid->ictxt = Csys2blacs_handle(icomm); 1692 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1693 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1694 grid->grid_refct = 1; 1695 grid->nprow = nprow; 1696 grid->npcol = npcol; 1697 grid->myrow = myrow; 1698 grid->mycol = mycol; 1699 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1700 grid->ictxrow = Csys2blacs_handle(icomm); 1701 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1702 grid->ictxcol = Csys2blacs_handle(icomm); 1703 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1704 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1705 1706 } else grid->grid_refct++; 1707 PetscCall(PetscCommDestroy(&icomm)); 1708 a->grid = grid; 1709 a->mb = DEFAULT_BLOCKSIZE; 1710 a->nb = DEFAULT_BLOCKSIZE; 1711 1712 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1713 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1714 if (flg) { 1715 a->mb = array[0]; 1716 a->nb = (k > 1) ? array[1] : a->mb; 1717 } 1718 PetscOptionsEnd(); 1719 1720 a->roworiented = PETSC_TRUE; 1721 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1722 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1723 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1724 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /*@C 1729 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1730 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1731 1732 Collective 1733 1734 Input Parameters: 1735 + comm - MPI communicator 1736 . mb - row block size (or `PETSC_DECIDE` to have it set) 1737 . nb - column block size (or `PETSC_DECIDE` to have it set) 1738 . M - number of global rows 1739 . N - number of global columns 1740 . rsrc - coordinate of process that owns the first row of the distributed matrix 1741 - csrc - coordinate of process that owns the first column of the distributed matrix 1742 1743 Output Parameter: 1744 . A - the matrix 1745 1746 Options Database Key: 1747 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1748 1749 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1750 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1751 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1752 1753 Note: 1754 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value 1755 is chosen. 1756 1757 Storage Information: 1758 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1759 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1760 significance and are thus ignored. The block sizes refer to the values 1761 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1762 1763 Level: intermediate 1764 1765 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1766 @*/ 1767 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) { 1768 Mat_ScaLAPACK *a; 1769 PetscInt m, n; 1770 1771 PetscFunctionBegin; 1772 PetscCall(MatCreate(comm, A)); 1773 PetscCall(MatSetType(*A, MATSCALAPACK)); 1774 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1775 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1776 m = PETSC_DECIDE; 1777 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1778 n = PETSC_DECIDE; 1779 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1780 PetscCall(MatSetSizes(*A, m, n, M, N)); 1781 a = (Mat_ScaLAPACK *)(*A)->data; 1782 PetscCall(PetscBLASIntCast(M, &a->M)); 1783 PetscCall(PetscBLASIntCast(N, &a->N)); 1784 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1785 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1786 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1787 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1788 PetscCall(MatSetUp(*A)); 1789 PetscFunctionReturn(0); 1790 } 1791