1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(PETSC_SUCCESS); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 xlld = PetscMax(1, A->rmap->n); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 xlld = PetscMax(1, lszx); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 201 202 /* redistribute y from a row of a 2d matrix */ 203 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 204 205 } else { /* non-transpose */ 206 207 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 208 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 209 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 210 xlld = 1; 211 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 212 PetscCheckScaLapackInfo("descinit", info); 213 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 214 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 215 ylld = PetscMax(1, A->rmap->n); 216 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 217 PetscCheckScaLapackInfo("descinit", info); 218 219 /* allocate 2d vectors */ 220 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 221 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 222 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 223 ylld = PetscMax(1, lszy); 224 225 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 226 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 227 PetscCheckScaLapackInfo("descinit", info); 228 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 229 PetscCheckScaLapackInfo("descinit", info); 230 231 /* redistribute x as a row of a 2d matrix */ 232 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 233 234 /* redistribute y as a column of a 2d matrix */ 235 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 236 237 /* call PBLAS subroutine */ 238 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 239 240 /* redistribute y from a column of a 2d matrix */ 241 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 242 } 243 PetscCall(PetscFree2(x2d, y2d)); 244 PetscFunctionReturn(PETSC_SUCCESS); 245 } 246 247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 248 { 249 const PetscScalar *xarray; 250 PetscScalar *yarray; 251 252 PetscFunctionBegin; 253 PetscCall(VecGetArrayRead(x, &xarray)); 254 PetscCall(VecGetArray(y, &yarray)); 255 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); 256 PetscCall(VecRestoreArrayRead(x, &xarray)); 257 PetscCall(VecRestoreArray(y, &yarray)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 262 { 263 const PetscScalar *xarray; 264 PetscScalar *yarray; 265 266 PetscFunctionBegin; 267 PetscCall(VecGetArrayRead(x, &xarray)); 268 PetscCall(VecGetArray(y, &yarray)); 269 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); 270 PetscCall(VecRestoreArrayRead(x, &xarray)); 271 PetscCall(VecRestoreArray(y, &yarray)); 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 276 { 277 const PetscScalar *xarray; 278 PetscScalar *yarray; 279 280 PetscFunctionBegin; 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArrayWrite(y, &yarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArrayWrite(y, &yarray)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 305 { 306 const PetscScalar *xarray; 307 PetscScalar *zarray; 308 309 PetscFunctionBegin; 310 if (y != z) PetscCall(VecCopy(y, z)); 311 PetscCall(VecGetArrayRead(x, &xarray)); 312 PetscCall(VecGetArray(z, &zarray)); 313 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); 314 PetscCall(VecRestoreArrayRead(x, &xarray)); 315 PetscCall(VecRestoreArray(z, &zarray)); 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 320 { 321 const PetscScalar *xarray; 322 PetscScalar *zarray; 323 324 PetscFunctionBegin; 325 if (y != z) PetscCall(VecCopy(y, z)); 326 PetscCall(VecGetArrayRead(x, &xarray)); 327 PetscCall(VecGetArray(z, &zarray)); 328 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); 329 PetscCall(VecRestoreArrayRead(x, &xarray)); 330 PetscCall(VecRestoreArray(z, &zarray)); 331 PetscFunctionReturn(PETSC_SUCCESS); 332 } 333 334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 335 { 336 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 337 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 338 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 339 PetscScalar sone = 1.0, zero = 0.0; 340 PetscBLASInt one = 1; 341 342 PetscFunctionBegin; 343 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 344 C->assembled = PETSC_TRUE; 345 PetscFunctionReturn(PETSC_SUCCESS); 346 } 347 348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 349 { 350 PetscFunctionBegin; 351 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 352 PetscCall(MatSetType(C, MATSCALAPACK)); 353 PetscCall(MatSetUp(C)); 354 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 359 { 360 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 361 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 362 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 363 PetscScalar sone = 1.0, zero = 0.0; 364 PetscBLASInt one = 1; 365 366 PetscFunctionBegin; 367 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 368 C->assembled = PETSC_TRUE; 369 PetscFunctionReturn(PETSC_SUCCESS); 370 } 371 372 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 373 { 374 PetscFunctionBegin; 375 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 376 PetscCall(MatSetType(C, MATSCALAPACK)); 377 PetscCall(MatSetUp(C)); 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 382 { 383 PetscFunctionBegin; 384 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 385 C->ops->productsymbolic = MatProductSymbolic_AB; 386 PetscFunctionReturn(PETSC_SUCCESS); 387 } 388 389 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 390 { 391 PetscFunctionBegin; 392 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 393 C->ops->productsymbolic = MatProductSymbolic_ABt; 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 398 { 399 Mat_Product *product = C->product; 400 401 PetscFunctionBegin; 402 switch (product->type) { 403 case MATPRODUCT_AB: 404 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 405 break; 406 case MATPRODUCT_ABt: 407 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 408 break; 409 default: 410 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 411 } 412 PetscFunctionReturn(PETSC_SUCCESS); 413 } 414 415 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 416 { 417 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 418 PetscScalar *darray, *d2d, v; 419 const PetscInt *ranges; 420 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 421 422 PetscFunctionBegin; 423 PetscCall(VecGetArray(D, &darray)); 424 425 if (A->rmap->N <= A->cmap->N) { /* row version */ 426 427 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 428 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 429 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 430 dlld = PetscMax(1, A->rmap->n); 431 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 432 PetscCheckScaLapackInfo("descinit", info); 433 434 /* allocate 2d vector */ 435 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 436 PetscCall(PetscCalloc1(lszd, &d2d)); 437 dlld = PetscMax(1, lszd); 438 439 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 440 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 441 PetscCheckScaLapackInfo("descinit", info); 442 443 /* collect diagonal */ 444 for (j = 1; j <= a->M; j++) { 445 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 446 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 447 } 448 449 /* redistribute d from a column of a 2d matrix */ 450 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 451 PetscCall(PetscFree(d2d)); 452 453 } else { /* column version */ 454 455 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 456 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 457 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 458 dlld = 1; 459 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 460 PetscCheckScaLapackInfo("descinit", info); 461 462 /* allocate 2d vector */ 463 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 464 PetscCall(PetscCalloc1(lszd, &d2d)); 465 466 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 467 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 468 PetscCheckScaLapackInfo("descinit", info); 469 470 /* collect diagonal */ 471 for (j = 1; j <= a->N; j++) { 472 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 473 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 474 } 475 476 /* redistribute d from a row of a 2d matrix */ 477 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 478 PetscCall(PetscFree(d2d)); 479 } 480 481 PetscCall(VecRestoreArray(D, &darray)); 482 PetscCall(VecAssemblyBegin(D)); 483 PetscCall(VecAssemblyEnd(D)); 484 PetscFunctionReturn(PETSC_SUCCESS); 485 } 486 487 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 488 { 489 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 490 const PetscScalar *d; 491 const PetscInt *ranges; 492 PetscScalar *d2d; 493 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 494 495 PetscFunctionBegin; 496 if (R) { 497 PetscCall(VecGetArrayRead(R, &d)); 498 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 499 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 500 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 501 dlld = 1; 502 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 503 PetscCheckScaLapackInfo("descinit", info); 504 505 /* allocate 2d vector */ 506 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 507 PetscCall(PetscCalloc1(lszd, &d2d)); 508 509 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 510 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 511 PetscCheckScaLapackInfo("descinit", info); 512 513 /* redistribute d to a row of a 2d matrix */ 514 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 515 516 /* broadcast along process columns */ 517 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 518 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 519 520 /* local scaling */ 521 for (j = 0; j < a->locc; j++) 522 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 523 524 PetscCall(PetscFree(d2d)); 525 PetscCall(VecRestoreArrayRead(R, &d)); 526 } 527 if (L) { 528 PetscCall(VecGetArrayRead(L, &d)); 529 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 530 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 531 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 532 dlld = PetscMax(1, A->rmap->n); 533 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 534 PetscCheckScaLapackInfo("descinit", info); 535 536 /* allocate 2d vector */ 537 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 538 PetscCall(PetscCalloc1(lszd, &d2d)); 539 dlld = PetscMax(1, lszd); 540 541 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 542 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 543 PetscCheckScaLapackInfo("descinit", info); 544 545 /* redistribute d to a column of a 2d matrix */ 546 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 547 548 /* broadcast along process rows */ 549 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 550 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 551 552 /* local scaling */ 553 for (i = 0; i < a->locr; i++) 554 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 555 556 PetscCall(PetscFree(d2d)); 557 PetscCall(VecRestoreArrayRead(L, &d)); 558 } 559 PetscFunctionReturn(PETSC_SUCCESS); 560 } 561 562 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 563 { 564 PetscFunctionBegin; 565 *missing = PETSC_FALSE; 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 570 { 571 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 572 PetscBLASInt n, one = 1; 573 574 PetscFunctionBegin; 575 n = x->lld * x->locc; 576 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 577 PetscFunctionReturn(PETSC_SUCCESS); 578 } 579 580 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 581 { 582 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 583 PetscBLASInt i, n; 584 PetscScalar v; 585 586 PetscFunctionBegin; 587 n = PetscMin(x->M, x->N); 588 for (i = 1; i <= n; i++) { 589 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 590 v += alpha; 591 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 592 } 593 PetscFunctionReturn(PETSC_SUCCESS); 594 } 595 596 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 597 { 598 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 599 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 600 PetscBLASInt one = 1; 601 PetscScalar beta = 1.0; 602 603 PetscFunctionBegin; 604 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 605 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 606 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 611 { 612 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 613 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 614 615 PetscFunctionBegin; 616 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 617 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 618 PetscFunctionReturn(PETSC_SUCCESS); 619 } 620 621 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 622 { 623 Mat Bs; 624 MPI_Comm comm; 625 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 626 627 PetscFunctionBegin; 628 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 629 PetscCall(MatCreate(comm, &Bs)); 630 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 631 PetscCall(MatSetType(Bs, MATSCALAPACK)); 632 b = (Mat_ScaLAPACK *)Bs->data; 633 b->M = a->M; 634 b->N = a->N; 635 b->mb = a->mb; 636 b->nb = a->nb; 637 b->rsrc = a->rsrc; 638 b->csrc = a->csrc; 639 PetscCall(MatSetUp(Bs)); 640 *B = Bs; 641 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 642 Bs->assembled = PETSC_TRUE; 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 647 { 648 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 649 Mat Bs = *B; 650 PetscBLASInt one = 1; 651 PetscScalar sone = 1.0, zero = 0.0; 652 #if defined(PETSC_USE_COMPLEX) 653 PetscInt i, j; 654 #endif 655 656 PetscFunctionBegin; 657 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 658 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 659 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 660 *B = Bs; 661 b = (Mat_ScaLAPACK *)Bs->data; 662 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 663 #if defined(PETSC_USE_COMPLEX) 664 /* undo conjugation */ 665 for (i = 0; i < b->locr; i++) 666 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 667 #endif 668 Bs->assembled = PETSC_TRUE; 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 673 { 674 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 675 PetscInt i, j; 676 677 PetscFunctionBegin; 678 for (i = 0; i < a->locr; i++) 679 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 680 PetscFunctionReturn(PETSC_SUCCESS); 681 } 682 683 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 684 { 685 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 686 Mat Bs = *B; 687 PetscBLASInt one = 1; 688 PetscScalar sone = 1.0, zero = 0.0; 689 690 PetscFunctionBegin; 691 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 692 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 693 *B = Bs; 694 b = (Mat_ScaLAPACK *)Bs->data; 695 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 696 Bs->assembled = PETSC_TRUE; 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 700 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 701 { 702 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 703 PetscScalar *x, *x2d; 704 const PetscInt *ranges; 705 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 706 707 PetscFunctionBegin; 708 PetscCall(VecCopy(B, X)); 709 PetscCall(VecGetArray(X, &x)); 710 711 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 712 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 713 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 714 xlld = PetscMax(1, A->rmap->n); 715 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 716 PetscCheckScaLapackInfo("descinit", info); 717 718 /* allocate 2d vector */ 719 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 720 PetscCall(PetscMalloc1(lszx, &x2d)); 721 xlld = PetscMax(1, lszx); 722 723 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 724 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 725 PetscCheckScaLapackInfo("descinit", info); 726 727 /* redistribute x as a column of a 2d matrix */ 728 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 729 730 /* call ScaLAPACK subroutine */ 731 switch (A->factortype) { 732 case MAT_FACTOR_LU: 733 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 734 PetscCheckScaLapackInfo("getrs", info); 735 break; 736 case MAT_FACTOR_CHOLESKY: 737 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 738 PetscCheckScaLapackInfo("potrs", info); 739 break; 740 default: 741 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 742 } 743 744 /* redistribute x from a column of a 2d matrix */ 745 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 746 747 PetscCall(PetscFree(x2d)); 748 PetscCall(VecRestoreArray(X, &x)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 753 { 754 PetscFunctionBegin; 755 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 756 PetscCall(VecAXPY(X, 1, Y)); 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 761 { 762 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 763 PetscBool flg1, flg2; 764 PetscBLASInt one = 1, info; 765 766 PetscFunctionBegin; 767 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 768 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 769 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 770 MatScaLAPACKCheckDistribution(B, 1, X, 2); 771 b = (Mat_ScaLAPACK *)B->data; 772 x = (Mat_ScaLAPACK *)X->data; 773 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 774 775 switch (A->factortype) { 776 case MAT_FACTOR_LU: 777 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 778 PetscCheckScaLapackInfo("getrs", info); 779 break; 780 case MAT_FACTOR_CHOLESKY: 781 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 782 PetscCheckScaLapackInfo("potrs", info); 783 break; 784 default: 785 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 786 } 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 791 { 792 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 793 PetscBLASInt one = 1, info; 794 795 PetscFunctionBegin; 796 if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 797 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 798 PetscCheckScaLapackInfo("getrf", info); 799 A->factortype = MAT_FACTOR_LU; 800 A->assembled = PETSC_TRUE; 801 802 PetscCall(PetscFree(A->solvertype)); 803 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 808 { 809 PetscFunctionBegin; 810 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 811 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 816 { 817 PetscFunctionBegin; 818 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 823 { 824 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 825 PetscBLASInt one = 1, info; 826 827 PetscFunctionBegin; 828 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 829 PetscCheckScaLapackInfo("potrf", info); 830 A->factortype = MAT_FACTOR_CHOLESKY; 831 A->assembled = PETSC_TRUE; 832 833 PetscCall(PetscFree(A->solvertype)); 834 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 835 PetscFunctionReturn(PETSC_SUCCESS); 836 } 837 838 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 839 { 840 PetscFunctionBegin; 841 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 842 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 846 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 847 { 848 PetscFunctionBegin; 849 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 854 { 855 PetscFunctionBegin; 856 *type = MATSOLVERSCALAPACK; 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 861 { 862 Mat B; 863 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 864 865 PetscFunctionBegin; 866 /* Create the factorization matrix */ 867 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 868 B->trivialsymbolic = PETSC_TRUE; 869 B->factortype = ftype; 870 PetscCall(PetscFree(B->solvertype)); 871 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 872 873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 874 *F = B; 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 879 { 880 PetscFunctionBegin; 881 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 882 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 887 { 888 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 889 PetscBLASInt one = 1, lwork = 0; 890 const char *ntype; 891 PetscScalar *work = NULL, dummy; 892 893 PetscFunctionBegin; 894 switch (type) { 895 case NORM_1: 896 ntype = "1"; 897 lwork = PetscMax(a->locr, a->locc); 898 break; 899 case NORM_FROBENIUS: 900 ntype = "F"; 901 work = &dummy; 902 break; 903 case NORM_INFINITY: 904 ntype = "I"; 905 lwork = PetscMax(a->locr, a->locc); 906 break; 907 default: 908 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 909 } 910 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 911 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 912 if (lwork) PetscCall(PetscFree(work)); 913 PetscFunctionReturn(PETSC_SUCCESS); 914 } 915 916 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 917 { 918 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 919 920 PetscFunctionBegin; 921 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 922 PetscFunctionReturn(PETSC_SUCCESS); 923 } 924 925 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 926 { 927 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 928 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 929 930 PetscFunctionBegin; 931 if (rows) { 932 n = a->locr; 933 nb = a->mb; 934 isrc = a->rsrc; 935 nproc = a->grid->nprow; 936 iproc = a->grid->myrow; 937 PetscCall(PetscMalloc1(n, &idx)); 938 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 939 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 940 } 941 if (cols) { 942 n = a->locc; 943 nb = a->nb; 944 isrc = a->csrc; 945 nproc = a->grid->npcol; 946 iproc = a->grid->mycol; 947 PetscCall(PetscMalloc1(n, &idx)); 948 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 949 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 950 } 951 PetscFunctionReturn(PETSC_SUCCESS); 952 } 953 954 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 955 { 956 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 957 Mat Bmpi; 958 MPI_Comm comm; 959 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 960 const PetscInt *ranges, *branges, *cwork; 961 const PetscScalar *vwork; 962 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 963 PetscScalar *barray; 964 PetscBool differ = PETSC_FALSE; 965 PetscMPIInt size; 966 967 PetscFunctionBegin; 968 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 969 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 970 971 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 972 PetscCallMPI(MPI_Comm_size(comm, &size)); 973 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 974 for (i = 0; i < size; i++) 975 if (ranges[i + 1] != branges[i + 1]) { 976 differ = PETSC_TRUE; 977 break; 978 } 979 } 980 981 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 982 PetscCall(MatCreate(comm, &Bmpi)); 983 m = PETSC_DECIDE; 984 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 985 n = PETSC_DECIDE; 986 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 987 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 988 PetscCall(MatSetType(Bmpi, MATDENSE)); 989 PetscCall(MatSetUp(Bmpi)); 990 991 /* create ScaLAPACK descriptor for B (1d block distribution) */ 992 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 993 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 994 lld = PetscMax(ldb, 1); /* local leading dimension */ 995 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 996 PetscCheckScaLapackInfo("descinit", info); 997 998 /* redistribute matrix */ 999 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1000 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1001 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1002 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1003 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1004 1005 /* transfer rows of auxiliary matrix to the final matrix B */ 1006 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 1007 for (i = rstart; i < rend; i++) { 1008 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 1009 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 1010 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 1011 } 1012 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 1013 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 1014 PetscCall(MatDestroy(&Bmpi)); 1015 1016 } else { /* normal cases */ 1017 1018 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1019 else { 1020 PetscCall(MatCreate(comm, &Bmpi)); 1021 m = PETSC_DECIDE; 1022 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1023 n = PETSC_DECIDE; 1024 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1025 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1026 PetscCall(MatSetType(Bmpi, MATDENSE)); 1027 PetscCall(MatSetUp(Bmpi)); 1028 } 1029 1030 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1031 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1032 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1033 lld = PetscMax(ldb, 1); /* local leading dimension */ 1034 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1035 PetscCheckScaLapackInfo("descinit", info); 1036 1037 /* redistribute matrix */ 1038 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1039 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1040 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1041 1042 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1043 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1044 if (reuse == MAT_INPLACE_MATRIX) { 1045 PetscCall(MatHeaderReplace(A, &Bmpi)); 1046 } else *B = Bmpi; 1047 } 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1052 { 1053 const PetscInt *ranges; 1054 PetscMPIInt size; 1055 PetscInt i, n; 1056 1057 PetscFunctionBegin; 1058 *correct = PETSC_TRUE; 1059 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1060 if (size > 1) { 1061 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1062 n = ranges[1] - ranges[0]; 1063 for (i = 1; i < size; i++) 1064 if (ranges[i + 1] - ranges[i] != n) break; 1065 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1066 } 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1071 { 1072 Mat_ScaLAPACK *b; 1073 Mat Bmpi; 1074 MPI_Comm comm; 1075 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1076 const PetscInt *ranges, *rows, *cols; 1077 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1078 const PetscScalar *aarray; 1079 IS ir, ic; 1080 PetscInt lda; 1081 PetscBool flg; 1082 1083 PetscFunctionBegin; 1084 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1085 1086 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1087 else { 1088 PetscCall(MatCreate(comm, &Bmpi)); 1089 m = PETSC_DECIDE; 1090 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1091 n = PETSC_DECIDE; 1092 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1093 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1094 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1095 PetscCall(MatSetUp(Bmpi)); 1096 } 1097 b = (Mat_ScaLAPACK *)Bmpi->data; 1098 1099 PetscCall(MatDenseGetLDA(A, &lda)); 1100 PetscCall(MatDenseGetArrayRead(A, &aarray)); 1101 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1102 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1103 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1104 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1105 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1106 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1107 lld = PetscMax(lda, 1); /* local leading dimension */ 1108 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1109 PetscCheckScaLapackInfo("descinit", info); 1110 1111 /* redistribute matrix */ 1112 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1113 Bmpi->nooffprocentries = PETSC_TRUE; 1114 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1115 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1116 b->roworiented = PETSC_FALSE; 1117 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1118 PetscCall(ISGetIndices(ir, &rows)); 1119 PetscCall(ISGetIndices(ic, &cols)); 1120 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1121 PetscCall(ISRestoreIndices(ir, &rows)); 1122 PetscCall(ISRestoreIndices(ic, &cols)); 1123 PetscCall(ISDestroy(&ic)); 1124 PetscCall(ISDestroy(&ir)); 1125 } 1126 PetscCall(MatDenseRestoreArrayRead(A, &aarray)); 1127 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1128 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1129 if (reuse == MAT_INPLACE_MATRIX) { 1130 PetscCall(MatHeaderReplace(A, &Bmpi)); 1131 } else *B = Bmpi; 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1136 { 1137 Mat mat_scal; 1138 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1139 const PetscInt *cols; 1140 const PetscScalar *vals; 1141 1142 PetscFunctionBegin; 1143 if (reuse == MAT_REUSE_MATRIX) { 1144 mat_scal = *newmat; 1145 PetscCall(MatZeroEntries(mat_scal)); 1146 } else { 1147 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1148 m = PETSC_DECIDE; 1149 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1150 n = PETSC_DECIDE; 1151 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1152 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1153 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1154 PetscCall(MatSetUp(mat_scal)); 1155 } 1156 for (row = rstart; row < rend; row++) { 1157 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1158 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1159 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1160 } 1161 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1162 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1163 1164 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1165 else *newmat = mat_scal; 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1170 { 1171 Mat mat_scal; 1172 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1173 const PetscInt *cols; 1174 const PetscScalar *vals; 1175 PetscScalar v; 1176 1177 PetscFunctionBegin; 1178 if (reuse == MAT_REUSE_MATRIX) { 1179 mat_scal = *newmat; 1180 PetscCall(MatZeroEntries(mat_scal)); 1181 } else { 1182 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1183 m = PETSC_DECIDE; 1184 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1185 n = PETSC_DECIDE; 1186 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1187 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1188 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1189 PetscCall(MatSetUp(mat_scal)); 1190 } 1191 PetscCall(MatGetRowUpperTriangular(A)); 1192 for (row = rstart; row < rend; row++) { 1193 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1194 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1195 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1196 if (cols[j] == row) continue; 1197 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1198 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1199 } 1200 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1201 } 1202 PetscCall(MatRestoreRowUpperTriangular(A)); 1203 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1204 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1205 1206 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1207 else *newmat = mat_scal; 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1212 { 1213 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1214 PetscInt sz = 0; 1215 1216 PetscFunctionBegin; 1217 PetscCall(PetscLayoutSetUp(A->rmap)); 1218 PetscCall(PetscLayoutSetUp(A->cmap)); 1219 if (!a->lld) a->lld = a->locr; 1220 1221 PetscCall(PetscFree(a->loc)); 1222 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1223 PetscCall(PetscCalloc1(sz, &a->loc)); 1224 1225 A->preallocated = PETSC_TRUE; 1226 PetscFunctionReturn(PETSC_SUCCESS); 1227 } 1228 1229 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1230 { 1231 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1232 Mat_ScaLAPACK_Grid *grid; 1233 PetscBool flg; 1234 MPI_Comm icomm; 1235 1236 PetscFunctionBegin; 1237 PetscCall(MatStashDestroy_Private(&A->stash)); 1238 PetscCall(PetscFree(a->loc)); 1239 PetscCall(PetscFree(a->pivots)); 1240 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1241 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1242 if (--grid->grid_refct == 0) { 1243 Cblacs_gridexit(grid->ictxt); 1244 Cblacs_gridexit(grid->ictxrow); 1245 Cblacs_gridexit(grid->ictxcol); 1246 PetscCall(PetscFree(grid)); 1247 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1248 } 1249 PetscCall(PetscCommDestroy(&icomm)); 1250 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1251 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1252 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1253 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1254 PetscCall(PetscFree(A->data)); 1255 PetscFunctionReturn(PETSC_SUCCESS); 1256 } 1257 1258 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1259 { 1260 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1261 PetscBLASInt info = 0; 1262 PetscBool flg; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscLayoutSetUp(A->rmap)); 1266 PetscCall(PetscLayoutSetUp(A->cmap)); 1267 1268 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1269 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1270 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1271 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1272 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1273 1274 /* compute local sizes */ 1275 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1276 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1277 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1278 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1279 a->lld = PetscMax(1, a->locr); 1280 1281 /* allocate local array */ 1282 PetscCall(MatScaLAPACKSetPreallocation(A)); 1283 1284 /* set up ScaLAPACK descriptor */ 1285 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1286 PetscCheckScaLapackInfo("descinit", info); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 1290 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1291 { 1292 PetscInt nstash, reallocs; 1293 1294 PetscFunctionBegin; 1295 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1296 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1297 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1298 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1303 { 1304 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1305 PetscMPIInt n; 1306 PetscInt i, flg, *row, *col; 1307 PetscScalar *val; 1308 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1309 1310 PetscFunctionBegin; 1311 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1312 while (1) { 1313 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1314 if (!flg) break; 1315 for (i = 0; i < n; i++) { 1316 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1317 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1318 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1319 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1320 switch (A->insertmode) { 1321 case INSERT_VALUES: 1322 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1323 break; 1324 case ADD_VALUES: 1325 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1326 break; 1327 default: 1328 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1329 } 1330 } 1331 } 1332 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } 1335 1336 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1337 { 1338 Mat Adense, As; 1339 MPI_Comm comm; 1340 1341 PetscFunctionBegin; 1342 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1343 PetscCall(MatCreate(comm, &Adense)); 1344 PetscCall(MatSetType(Adense, MATDENSE)); 1345 PetscCall(MatLoad(Adense, viewer)); 1346 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1347 PetscCall(MatDestroy(&Adense)); 1348 PetscCall(MatHeaderReplace(newMat, &As)); 1349 PetscFunctionReturn(PETSC_SUCCESS); 1350 } 1351 1352 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1353 0, 1354 0, 1355 MatMult_ScaLAPACK, 1356 /* 4*/ MatMultAdd_ScaLAPACK, 1357 MatMultTranspose_ScaLAPACK, 1358 MatMultTransposeAdd_ScaLAPACK, 1359 MatSolve_ScaLAPACK, 1360 MatSolveAdd_ScaLAPACK, 1361 0, 1362 /*10*/ 0, 1363 MatLUFactor_ScaLAPACK, 1364 MatCholeskyFactor_ScaLAPACK, 1365 0, 1366 MatTranspose_ScaLAPACK, 1367 /*15*/ MatGetInfo_ScaLAPACK, 1368 0, 1369 MatGetDiagonal_ScaLAPACK, 1370 MatDiagonalScale_ScaLAPACK, 1371 MatNorm_ScaLAPACK, 1372 /*20*/ MatAssemblyBegin_ScaLAPACK, 1373 MatAssemblyEnd_ScaLAPACK, 1374 MatSetOption_ScaLAPACK, 1375 MatZeroEntries_ScaLAPACK, 1376 /*24*/ 0, 1377 MatLUFactorSymbolic_ScaLAPACK, 1378 MatLUFactorNumeric_ScaLAPACK, 1379 MatCholeskyFactorSymbolic_ScaLAPACK, 1380 MatCholeskyFactorNumeric_ScaLAPACK, 1381 /*29*/ MatSetUp_ScaLAPACK, 1382 0, 1383 0, 1384 0, 1385 0, 1386 /*34*/ MatDuplicate_ScaLAPACK, 1387 0, 1388 0, 1389 0, 1390 0, 1391 /*39*/ MatAXPY_ScaLAPACK, 1392 0, 1393 0, 1394 0, 1395 MatCopy_ScaLAPACK, 1396 /*44*/ 0, 1397 MatScale_ScaLAPACK, 1398 MatShift_ScaLAPACK, 1399 0, 1400 0, 1401 /*49*/ 0, 1402 0, 1403 0, 1404 0, 1405 0, 1406 /*54*/ 0, 1407 0, 1408 0, 1409 0, 1410 0, 1411 /*59*/ 0, 1412 MatDestroy_ScaLAPACK, 1413 MatView_ScaLAPACK, 1414 0, 1415 0, 1416 /*64*/ 0, 1417 0, 1418 0, 1419 0, 1420 0, 1421 /*69*/ 0, 1422 0, 1423 MatConvert_ScaLAPACK_Dense, 1424 0, 1425 0, 1426 /*74*/ 0, 1427 0, 1428 0, 1429 0, 1430 0, 1431 /*79*/ 0, 1432 0, 1433 0, 1434 0, 1435 MatLoad_ScaLAPACK, 1436 /*84*/ 0, 1437 0, 1438 0, 1439 0, 1440 0, 1441 /*89*/ 0, 1442 0, 1443 MatMatMultNumeric_ScaLAPACK, 1444 0, 1445 0, 1446 /*94*/ 0, 1447 0, 1448 0, 1449 MatMatTransposeMultNumeric_ScaLAPACK, 1450 0, 1451 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1452 0, 1453 0, 1454 MatConjugate_ScaLAPACK, 1455 0, 1456 /*104*/ 0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 /*109*/ MatMatSolve_ScaLAPACK, 1462 0, 1463 0, 1464 0, 1465 MatMissingDiagonal_ScaLAPACK, 1466 /*114*/ 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 /*119*/ 0, 1472 MatHermitianTranspose_ScaLAPACK, 1473 MatMultHermitianTranspose_ScaLAPACK, 1474 MatMultHermitianTransposeAdd_ScaLAPACK, 1475 0, 1476 /*124*/ 0, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*129*/ 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /*134*/ 0, 1487 0, 1488 0, 1489 0, 1490 0, 1491 0, 1492 /*140*/ 0, 1493 0, 1494 0, 1495 0, 1496 0, 1497 /*145*/ 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*150*/ 0, 1503 0}; 1504 1505 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1506 { 1507 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1508 PetscInt size = stash->size, nsends; 1509 PetscInt count, *sindices, **rindices, i, j, l; 1510 PetscScalar **rvalues, *svalues; 1511 MPI_Comm comm = stash->comm; 1512 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1513 PetscMPIInt *sizes, *nlengths, nreceives; 1514 PetscInt *sp_idx, *sp_idy; 1515 PetscScalar *sp_val; 1516 PetscMatStashSpace space, space_next; 1517 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1518 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1519 1520 PetscFunctionBegin; 1521 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1522 InsertMode addv; 1523 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1524 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1525 mat->insertmode = addv; /* in case this processor had no cache */ 1526 } 1527 1528 bs2 = stash->bs * stash->bs; 1529 1530 /* first count number of contributors to each processor */ 1531 PetscCall(PetscCalloc1(size, &nlengths)); 1532 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1533 1534 i = j = 0; 1535 space = stash->space_head; 1536 while (space) { 1537 space_next = space->next; 1538 for (l = 0; l < space->local_used; l++) { 1539 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1540 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1541 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1542 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1543 nlengths[j]++; 1544 owner[i] = j; 1545 i++; 1546 } 1547 space = space_next; 1548 } 1549 1550 /* Now check what procs get messages - and compute nsends. */ 1551 PetscCall(PetscCalloc1(size, &sizes)); 1552 for (i = 0, nsends = 0; i < size; i++) { 1553 if (nlengths[i]) { 1554 sizes[i] = 1; 1555 nsends++; 1556 } 1557 } 1558 1559 { 1560 PetscMPIInt *onodes, *olengths; 1561 /* Determine the number of messages to expect, their lengths, from from-ids */ 1562 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1563 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1564 /* since clubbing row,col - lengths are multiplied by 2 */ 1565 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1566 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1567 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1568 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1569 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1570 PetscCall(PetscFree(onodes)); 1571 PetscCall(PetscFree(olengths)); 1572 } 1573 1574 /* do sends: 1575 1) starts[i] gives the starting index in svalues for stuff going to 1576 the ith processor 1577 */ 1578 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1579 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1580 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1581 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1582 startv[0] = 0; 1583 starti[0] = 0; 1584 for (i = 1; i < size; i++) { 1585 startv[i] = startv[i - 1] + nlengths[i - 1]; 1586 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1587 } 1588 1589 i = 0; 1590 space = stash->space_head; 1591 while (space) { 1592 space_next = space->next; 1593 sp_idx = space->idx; 1594 sp_idy = space->idy; 1595 sp_val = space->val; 1596 for (l = 0; l < space->local_used; l++) { 1597 j = owner[i]; 1598 if (bs2 == 1) { 1599 svalues[startv[j]] = sp_val[l]; 1600 } else { 1601 PetscInt k; 1602 PetscScalar *buf1, *buf2; 1603 buf1 = svalues + bs2 * startv[j]; 1604 buf2 = space->val + bs2 * l; 1605 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1606 } 1607 sindices[starti[j]] = sp_idx[l]; 1608 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1609 startv[j]++; 1610 starti[j]++; 1611 i++; 1612 } 1613 space = space_next; 1614 } 1615 startv[0] = 0; 1616 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1617 1618 for (i = 0, count = 0; i < size; i++) { 1619 if (sizes[i]) { 1620 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1621 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1622 } 1623 } 1624 #if defined(PETSC_USE_INFO) 1625 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1626 for (i = 0; i < size; i++) { 1627 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1628 } 1629 #endif 1630 PetscCall(PetscFree(nlengths)); 1631 PetscCall(PetscFree(owner)); 1632 PetscCall(PetscFree2(startv, starti)); 1633 PetscCall(PetscFree(sizes)); 1634 1635 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1636 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1637 1638 for (i = 0; i < nreceives; i++) { 1639 recv_waits[2 * i] = recv_waits1[i]; 1640 recv_waits[2 * i + 1] = recv_waits2[i]; 1641 } 1642 stash->recv_waits = recv_waits; 1643 1644 PetscCall(PetscFree(recv_waits1)); 1645 PetscCall(PetscFree(recv_waits2)); 1646 1647 stash->svalues = svalues; 1648 stash->sindices = sindices; 1649 stash->rvalues = rvalues; 1650 stash->rindices = rindices; 1651 stash->send_waits = send_waits; 1652 stash->nsends = nsends; 1653 stash->nrecvs = nreceives; 1654 stash->reproduce_count = 0; 1655 PetscFunctionReturn(PETSC_SUCCESS); 1656 } 1657 1658 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1659 { 1660 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1661 1662 PetscFunctionBegin; 1663 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1664 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1665 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1666 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1667 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1668 PetscFunctionReturn(PETSC_SUCCESS); 1669 } 1670 1671 /*@ 1672 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1673 the `MATSCALAPACK` matrix 1674 1675 Logically Collective 1676 1677 Input Parameters: 1678 + A - a `MATSCALAPACK` matrix 1679 . mb - the row block size 1680 - nb - the column block size 1681 1682 Level: intermediate 1683 1684 Note: 1685 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1686 1687 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1688 @*/ 1689 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1690 { 1691 PetscFunctionBegin; 1692 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1693 PetscValidLogicalCollectiveInt(A, mb, 2); 1694 PetscValidLogicalCollectiveInt(A, nb, 3); 1695 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1696 PetscFunctionReturn(PETSC_SUCCESS); 1697 } 1698 1699 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1700 { 1701 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1702 1703 PetscFunctionBegin; 1704 if (mb) *mb = a->mb; 1705 if (nb) *nb = a->nb; 1706 PetscFunctionReturn(PETSC_SUCCESS); 1707 } 1708 1709 /*@ 1710 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1711 the `MATSCALAPACK` matrix 1712 1713 Not Collective 1714 1715 Input Parameter: 1716 . A - a `MATSCALAPACK` matrix 1717 1718 Output Parameters: 1719 + mb - the row block size 1720 - nb - the column block size 1721 1722 Level: intermediate 1723 1724 Note: 1725 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1726 1727 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1728 @*/ 1729 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1730 { 1731 PetscFunctionBegin; 1732 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1733 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1734 PetscFunctionReturn(PETSC_SUCCESS); 1735 } 1736 1737 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1738 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1739 1740 /*MC 1741 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1742 1743 Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1744 1745 Options Database Keys: 1746 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 1747 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1748 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1749 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1750 1751 Level: intermediate 1752 1753 Note: 1754 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1755 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1756 the given rank. 1757 1758 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1759 M*/ 1760 1761 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1762 { 1763 Mat_ScaLAPACK *a; 1764 PetscBool flg, flg1; 1765 Mat_ScaLAPACK_Grid *grid; 1766 MPI_Comm icomm; 1767 PetscBLASInt nprow, npcol, myrow, mycol; 1768 PetscInt optv1, k = 2, array[2] = {0, 0}; 1769 PetscMPIInt size; 1770 1771 PetscFunctionBegin; 1772 A->ops[0] = MatOps_Values; 1773 A->insertmode = NOT_SET_VALUES; 1774 1775 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1776 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1777 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1778 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1779 A->stash.ScatterDestroy = NULL; 1780 1781 PetscCall(PetscNew(&a)); 1782 A->data = (void *)a; 1783 1784 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1785 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1786 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1787 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1788 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1789 } 1790 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1791 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1792 if (!flg) { 1793 PetscCall(PetscNew(&grid)); 1794 1795 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1796 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1797 1798 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1799 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1800 if (flg1) { 1801 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1802 grid->nprow = optv1; 1803 } 1804 PetscOptionsEnd(); 1805 1806 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1807 grid->npcol = size / grid->nprow; 1808 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1809 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1810 grid->ictxt = Csys2blacs_handle(icomm); 1811 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1812 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1813 grid->grid_refct = 1; 1814 grid->nprow = nprow; 1815 grid->npcol = npcol; 1816 grid->myrow = myrow; 1817 grid->mycol = mycol; 1818 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1819 grid->ictxrow = Csys2blacs_handle(icomm); 1820 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1821 grid->ictxcol = Csys2blacs_handle(icomm); 1822 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1823 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1824 1825 } else grid->grid_refct++; 1826 PetscCall(PetscCommDestroy(&icomm)); 1827 a->grid = grid; 1828 a->mb = DEFAULT_BLOCKSIZE; 1829 a->nb = DEFAULT_BLOCKSIZE; 1830 1831 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1832 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1833 if (flg) { 1834 a->mb = array[0]; 1835 a->nb = (k > 1) ? array[1] : a->mb; 1836 } 1837 PetscOptionsEnd(); 1838 1839 a->roworiented = PETSC_TRUE; 1840 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1841 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1842 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1843 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1844 PetscFunctionReturn(PETSC_SUCCESS); 1845 } 1846 1847 /*@C 1848 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1849 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1850 1851 Collective 1852 1853 Input Parameters: 1854 + comm - MPI communicator 1855 . mb - row block size (or `PETSC_DECIDE` to have it set) 1856 . nb - column block size (or `PETSC_DECIDE` to have it set) 1857 . M - number of global rows 1858 . N - number of global columns 1859 . rsrc - coordinate of process that owns the first row of the distributed matrix 1860 - csrc - coordinate of process that owns the first column of the distributed matrix 1861 1862 Output Parameter: 1863 . A - the matrix 1864 1865 Options Database Key: 1866 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1867 1868 Level: intermediate 1869 1870 Notes: 1871 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 1872 1873 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1874 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1875 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1876 1877 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1878 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1879 significance and are thus ignored. The block sizes refer to the values 1880 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1881 1882 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1883 @*/ 1884 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1885 { 1886 Mat_ScaLAPACK *a; 1887 PetscInt m, n; 1888 1889 PetscFunctionBegin; 1890 PetscCall(MatCreate(comm, A)); 1891 PetscCall(MatSetType(*A, MATSCALAPACK)); 1892 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1893 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1894 m = PETSC_DECIDE; 1895 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1896 n = PETSC_DECIDE; 1897 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1898 PetscCall(MatSetSizes(*A, m, n, M, N)); 1899 a = (Mat_ScaLAPACK *)(*A)->data; 1900 PetscCall(PetscBLASIntCast(M, &a->M)); 1901 PetscCall(PetscBLASIntCast(N, &a->N)); 1902 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1903 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1904 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1905 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1906 PetscCall(MatSetUp(*A)); 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909