xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision cf53795e253c8c9d9d0fbcef4cffd37d916c4e81)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200 
201     /* redistribute y from a row of a 2d matrix */
202     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
203 
204   } else { /* non-transpose */
205 
206     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
207     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
208     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
209     xlld = 1;
210     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
211     PetscCheckScaLapackInfo("descinit", info);
212     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
213     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
214     ylld = PetscMax(1, A->rmap->n);
215     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
216     PetscCheckScaLapackInfo("descinit", info);
217 
218     /* allocate 2d vectors */
219     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
220     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
221     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
222     ylld = PetscMax(1, lszy);
223 
224     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
225     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
226     PetscCheckScaLapackInfo("descinit", info);
227     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
228     PetscCheckScaLapackInfo("descinit", info);
229 
230     /* redistribute x as a row of a 2d matrix */
231     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
232 
233     /* redistribute y as a column of a 2d matrix */
234     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
235 
236     /* call PBLAS subroutine */
237     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
238 
239     /* redistribute y from a column of a 2d matrix */
240     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
241   }
242   PetscCall(PetscFree2(x2d, y2d));
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
247 {
248   const PetscScalar *xarray;
249   PetscScalar       *yarray;
250 
251   PetscFunctionBegin;
252   PetscCall(VecGetArrayRead(x, &xarray));
253   PetscCall(VecGetArray(y, &yarray));
254   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
255   PetscCall(VecRestoreArrayRead(x, &xarray));
256   PetscCall(VecRestoreArray(y, &yarray));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
261 {
262   const PetscScalar *xarray;
263   PetscScalar       *yarray;
264 
265   PetscFunctionBegin;
266   PetscCall(VecGetArrayRead(x, &xarray));
267   PetscCall(VecGetArray(y, &yarray));
268   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
269   PetscCall(VecRestoreArrayRead(x, &xarray));
270   PetscCall(VecRestoreArray(y, &yarray));
271   PetscFunctionReturn(0);
272 }
273 
274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
275 {
276   const PetscScalar *xarray;
277   PetscScalar       *zarray;
278 
279   PetscFunctionBegin;
280   if (y != z) PetscCall(VecCopy(y, z));
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArray(z, &zarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArray(z, &zarray));
286   PetscFunctionReturn(0);
287 }
288 
289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(0);
302 }
303 
304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
305 {
306   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
307   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
308   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
309   PetscScalar    sone = 1.0, zero = 0.0;
310   PetscBLASInt   one = 1;
311 
312   PetscFunctionBegin;
313   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
314   C->assembled = PETSC_TRUE;
315   PetscFunctionReturn(0);
316 }
317 
318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
319 {
320   PetscFunctionBegin;
321   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
322   PetscCall(MatSetType(C, MATSCALAPACK));
323   PetscCall(MatSetUp(C));
324   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
325   PetscFunctionReturn(0);
326 }
327 
328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
329 {
330   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
331   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
332   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
333   PetscScalar    sone = 1.0, zero = 0.0;
334   PetscBLASInt   one = 1;
335 
336   PetscFunctionBegin;
337   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
338   C->assembled = PETSC_TRUE;
339   PetscFunctionReturn(0);
340 }
341 
342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
343 {
344   PetscFunctionBegin;
345   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
346   PetscCall(MatSetType(C, MATSCALAPACK));
347   PetscCall(MatSetUp(C));
348   PetscFunctionReturn(0);
349 }
350 
351 /* --------------------------------------- */
352 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
353 {
354   PetscFunctionBegin;
355   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
356   C->ops->productsymbolic = MatProductSymbolic_AB;
357   PetscFunctionReturn(0);
358 }
359 
360 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
361 {
362   PetscFunctionBegin;
363   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
364   C->ops->productsymbolic          = MatProductSymbolic_ABt;
365   PetscFunctionReturn(0);
366 }
367 
368 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
369 {
370   Mat_Product *product = C->product;
371 
372   PetscFunctionBegin;
373   switch (product->type) {
374   case MATPRODUCT_AB:
375     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
376     break;
377   case MATPRODUCT_ABt:
378     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
379     break;
380   default:
381     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
382   }
383   PetscFunctionReturn(0);
384 }
385 /* --------------------------------------- */
386 
387 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
388 {
389   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
390   PetscScalar    *darray, *d2d, v;
391   const PetscInt *ranges;
392   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
393 
394   PetscFunctionBegin;
395   PetscCall(VecGetArray(D, &darray));
396 
397   if (A->rmap->N <= A->cmap->N) { /* row version */
398 
399     /* create ScaLAPACK descriptor for vector (1d block distribution) */
400     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
401     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
402     dlld = PetscMax(1, A->rmap->n);
403     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
404     PetscCheckScaLapackInfo("descinit", info);
405 
406     /* allocate 2d vector */
407     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
408     PetscCall(PetscCalloc1(lszd, &d2d));
409     dlld = PetscMax(1, lszd);
410 
411     /* create ScaLAPACK descriptor for vector (2d block distribution) */
412     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
413     PetscCheckScaLapackInfo("descinit", info);
414 
415     /* collect diagonal */
416     for (j = 1; j <= a->M; j++) {
417       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
418       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
419     }
420 
421     /* redistribute d from a column of a 2d matrix */
422     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
423     PetscCall(PetscFree(d2d));
424 
425   } else { /* column version */
426 
427     /* create ScaLAPACK descriptor for vector (1d block distribution) */
428     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
429     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
430     dlld = 1;
431     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
432     PetscCheckScaLapackInfo("descinit", info);
433 
434     /* allocate 2d vector */
435     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
436     PetscCall(PetscCalloc1(lszd, &d2d));
437 
438     /* create ScaLAPACK descriptor for vector (2d block distribution) */
439     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
440     PetscCheckScaLapackInfo("descinit", info);
441 
442     /* collect diagonal */
443     for (j = 1; j <= a->N; j++) {
444       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
445       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
446     }
447 
448     /* redistribute d from a row of a 2d matrix */
449     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
450     PetscCall(PetscFree(d2d));
451   }
452 
453   PetscCall(VecRestoreArray(D, &darray));
454   PetscCall(VecAssemblyBegin(D));
455   PetscCall(VecAssemblyEnd(D));
456   PetscFunctionReturn(0);
457 }
458 
459 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
460 {
461   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
462   const PetscScalar *d;
463   const PetscInt    *ranges;
464   PetscScalar       *d2d;
465   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
466 
467   PetscFunctionBegin;
468   if (R) {
469     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* redistribute d to a row of a 2d matrix */
486     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
487 
488     /* broadcast along process columns */
489     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
490     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
491 
492     /* local scaling */
493     for (j = 0; j < a->locc; j++)
494       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
495 
496     PetscCall(PetscFree(d2d));
497     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
498   }
499   if (L) {
500     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
501     /* create ScaLAPACK descriptor for vector (1d block distribution) */
502     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
503     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
504     dlld = PetscMax(1, A->rmap->n);
505     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
506     PetscCheckScaLapackInfo("descinit", info);
507 
508     /* allocate 2d vector */
509     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
510     PetscCall(PetscCalloc1(lszd, &d2d));
511     dlld = PetscMax(1, lszd);
512 
513     /* create ScaLAPACK descriptor for vector (2d block distribution) */
514     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
515     PetscCheckScaLapackInfo("descinit", info);
516 
517     /* redistribute d to a column of a 2d matrix */
518     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
519 
520     /* broadcast along process rows */
521     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
522     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
523 
524     /* local scaling */
525     for (i = 0; i < a->locr; i++)
526       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
527 
528     PetscCall(PetscFree(d2d));
529     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
535 {
536   PetscFunctionBegin;
537   *missing = PETSC_FALSE;
538   PetscFunctionReturn(0);
539 }
540 
541 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
542 {
543   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
544   PetscBLASInt   n, one = 1;
545 
546   PetscFunctionBegin;
547   n = x->lld * x->locc;
548   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
553 {
554   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
555   PetscBLASInt   i, n;
556   PetscScalar    v;
557 
558   PetscFunctionBegin;
559   n = PetscMin(x->M, x->N);
560   for (i = 1; i <= n; i++) {
561     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
562     v += alpha;
563     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
569 {
570   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
571   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
572   PetscBLASInt   one  = 1;
573   PetscScalar    beta = 1.0;
574 
575   PetscFunctionBegin;
576   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
577   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
578   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
579   PetscFunctionReturn(0);
580 }
581 
582 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
583 {
584   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
585   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
586 
587   PetscFunctionBegin;
588   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
589   PetscCall(PetscObjectStateIncrease((PetscObject)B));
590   PetscFunctionReturn(0);
591 }
592 
593 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
594 {
595   Mat            Bs;
596   MPI_Comm       comm;
597   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
598 
599   PetscFunctionBegin;
600   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
601   PetscCall(MatCreate(comm, &Bs));
602   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
603   PetscCall(MatSetType(Bs, MATSCALAPACK));
604   b       = (Mat_ScaLAPACK *)Bs->data;
605   b->M    = a->M;
606   b->N    = a->N;
607   b->mb   = a->mb;
608   b->nb   = a->nb;
609   b->rsrc = a->rsrc;
610   b->csrc = a->csrc;
611   PetscCall(MatSetUp(Bs));
612   *B = Bs;
613   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
614   Bs->assembled = PETSC_TRUE;
615   PetscFunctionReturn(0);
616 }
617 
618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
619 {
620   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
621   Mat            Bs   = *B;
622   PetscBLASInt   one  = 1;
623   PetscScalar    sone = 1.0, zero = 0.0;
624 #if defined(PETSC_USE_COMPLEX)
625   PetscInt i, j;
626 #endif
627 
628   PetscFunctionBegin;
629   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
630   if (reuse == MAT_INITIAL_MATRIX) {
631     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
632     *B = Bs;
633   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
634   b = (Mat_ScaLAPACK *)Bs->data;
635   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
636 #if defined(PETSC_USE_COMPLEX)
637   /* undo conjugation */
638   for (i = 0; i < b->locr; i++)
639     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
640 #endif
641   Bs->assembled = PETSC_TRUE;
642   PetscFunctionReturn(0);
643 }
644 
645 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
646 {
647   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
648   PetscInt       i, j;
649 
650   PetscFunctionBegin;
651   for (i = 0; i < a->locr; i++)
652     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
653   PetscFunctionReturn(0);
654 }
655 
656 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
657 {
658   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
659   Mat            Bs   = *B;
660   PetscBLASInt   one  = 1;
661   PetscScalar    sone = 1.0, zero = 0.0;
662 
663   PetscFunctionBegin;
664   if (reuse == MAT_INITIAL_MATRIX) {
665     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
666     *B = Bs;
667   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
668   b = (Mat_ScaLAPACK *)Bs->data;
669   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
670   Bs->assembled = PETSC_TRUE;
671   PetscFunctionReturn(0);
672 }
673 
674 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
675 {
676   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
677   PetscScalar    *x, *x2d;
678   const PetscInt *ranges;
679   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
680 
681   PetscFunctionBegin;
682   PetscCall(VecCopy(B, X));
683   PetscCall(VecGetArray(X, &x));
684 
685   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
686   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
687   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
688   xlld = PetscMax(1, A->rmap->n);
689   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
690   PetscCheckScaLapackInfo("descinit", info);
691 
692   /* allocate 2d vector */
693   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
694   PetscCall(PetscMalloc1(lszx, &x2d));
695   xlld = PetscMax(1, lszx);
696 
697   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
698   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
699   PetscCheckScaLapackInfo("descinit", info);
700 
701   /* redistribute x as a column of a 2d matrix */
702   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
703 
704   /* call ScaLAPACK subroutine */
705   switch (A->factortype) {
706   case MAT_FACTOR_LU:
707     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
708     PetscCheckScaLapackInfo("getrs", info);
709     break;
710   case MAT_FACTOR_CHOLESKY:
711     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
712     PetscCheckScaLapackInfo("potrs", info);
713     break;
714   default:
715     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
716   }
717 
718   /* redistribute x from a column of a 2d matrix */
719   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
720 
721   PetscCall(PetscFree(x2d));
722   PetscCall(VecRestoreArray(X, &x));
723   PetscFunctionReturn(0);
724 }
725 
726 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
727 {
728   PetscFunctionBegin;
729   PetscCall(MatSolve_ScaLAPACK(A, B, X));
730   PetscCall(VecAXPY(X, 1, Y));
731   PetscFunctionReturn(0);
732 }
733 
734 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
735 {
736   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
737   PetscBool      flg1, flg2;
738   PetscBLASInt   one = 1, info;
739 
740   PetscFunctionBegin;
741   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
742   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
743   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
744   MatScaLAPACKCheckDistribution(B, 1, X, 2);
745   b = (Mat_ScaLAPACK *)B->data;
746   x = (Mat_ScaLAPACK *)X->data;
747   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
748 
749   switch (A->factortype) {
750   case MAT_FACTOR_LU:
751     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
752     PetscCheckScaLapackInfo("getrs", info);
753     break;
754   case MAT_FACTOR_CHOLESKY:
755     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
756     PetscCheckScaLapackInfo("potrs", info);
757     break;
758   default:
759     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
765 {
766   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
767   PetscBLASInt   one = 1, info;
768 
769   PetscFunctionBegin;
770   if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); }
771   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
772   PetscCheckScaLapackInfo("getrf", info);
773   A->factortype = MAT_FACTOR_LU;
774   A->assembled  = PETSC_TRUE;
775 
776   PetscCall(PetscFree(A->solvertype));
777   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
778   PetscFunctionReturn(0);
779 }
780 
781 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
782 {
783   PetscFunctionBegin;
784   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
785   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
786   PetscFunctionReturn(0);
787 }
788 
789 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
790 {
791   PetscFunctionBegin;
792   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
793   PetscFunctionReturn(0);
794 }
795 
796 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
797 {
798   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
799   PetscBLASInt   one = 1, info;
800 
801   PetscFunctionBegin;
802   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
803   PetscCheckScaLapackInfo("potrf", info);
804   A->factortype = MAT_FACTOR_CHOLESKY;
805   A->assembled  = PETSC_TRUE;
806 
807   PetscCall(PetscFree(A->solvertype));
808   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
809   PetscFunctionReturn(0);
810 }
811 
812 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
813 {
814   PetscFunctionBegin;
815   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
816   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
817   PetscFunctionReturn(0);
818 }
819 
820 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
821 {
822   PetscFunctionBegin;
823   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
824   PetscFunctionReturn(0);
825 }
826 
827 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
828 {
829   PetscFunctionBegin;
830   *type = MATSOLVERSCALAPACK;
831   PetscFunctionReturn(0);
832 }
833 
834 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
835 {
836   Mat            B;
837   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
838 
839   PetscFunctionBegin;
840   /* Create the factorization matrix */
841   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
842   B->trivialsymbolic = PETSC_TRUE;
843   B->factortype      = ftype;
844   PetscCall(PetscFree(B->solvertype));
845   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
846 
847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
848   *F = B;
849   PetscFunctionReturn(0);
850 }
851 
852 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
853 {
854   PetscFunctionBegin;
855   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
856   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
857   PetscFunctionReturn(0);
858 }
859 
860 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
861 {
862   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
863   PetscBLASInt   one = 1, lwork = 0;
864   const char    *ntype;
865   PetscScalar   *work = NULL, dummy;
866 
867   PetscFunctionBegin;
868   switch (type) {
869   case NORM_1:
870     ntype = "1";
871     lwork = PetscMax(a->locr, a->locc);
872     break;
873   case NORM_FROBENIUS:
874     ntype = "F";
875     work  = &dummy;
876     break;
877   case NORM_INFINITY:
878     ntype = "I";
879     lwork = PetscMax(a->locr, a->locc);
880     break;
881   default:
882     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
883   }
884   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
885   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
886   if (lwork) PetscCall(PetscFree(work));
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
891 {
892   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
893 
894   PetscFunctionBegin;
895   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
896   PetscFunctionReturn(0);
897 }
898 
899 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
900 {
901   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
902   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
903 
904   PetscFunctionBegin;
905   if (rows) {
906     n     = a->locr;
907     nb    = a->mb;
908     isrc  = a->rsrc;
909     nproc = a->grid->nprow;
910     iproc = a->grid->myrow;
911     PetscCall(PetscMalloc1(n, &idx));
912     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
913     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
914   }
915   if (cols) {
916     n     = a->locc;
917     nb    = a->nb;
918     isrc  = a->csrc;
919     nproc = a->grid->npcol;
920     iproc = a->grid->mycol;
921     PetscCall(PetscMalloc1(n, &idx));
922     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
923     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
929 {
930   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
931   Mat                Bmpi;
932   MPI_Comm           comm;
933   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
934   const PetscInt    *ranges, *branges, *cwork;
935   const PetscScalar *vwork;
936   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
937   PetscScalar       *barray;
938   PetscBool          differ = PETSC_FALSE;
939   PetscMPIInt        size;
940 
941   PetscFunctionBegin;
942   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
943   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
944 
945   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
946     PetscCallMPI(MPI_Comm_size(comm, &size));
947     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
948     for (i = 0; i < size; i++)
949       if (ranges[i + 1] != branges[i + 1]) {
950         differ = PETSC_TRUE;
951         break;
952       }
953   }
954 
955   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
956     PetscCall(MatCreate(comm, &Bmpi));
957     m = PETSC_DECIDE;
958     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
959     n = PETSC_DECIDE;
960     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
961     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
962     PetscCall(MatSetType(Bmpi, MATDENSE));
963     PetscCall(MatSetUp(Bmpi));
964 
965     /* create ScaLAPACK descriptor for B (1d block distribution) */
966     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
967     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
968     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
969     PetscCheckScaLapackInfo("descinit", info);
970 
971     /* redistribute matrix */
972     PetscCall(MatDenseGetArray(Bmpi, &barray));
973     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
974     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
975     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
976     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
977 
978     /* transfer rows of auxiliary matrix to the final matrix B */
979     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
980     for (i = rstart; i < rend; i++) {
981       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
982       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
983       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
984     }
985     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
986     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
987     PetscCall(MatDestroy(&Bmpi));
988 
989   } else { /* normal cases */
990 
991     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
992     else {
993       PetscCall(MatCreate(comm, &Bmpi));
994       m = PETSC_DECIDE;
995       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
996       n = PETSC_DECIDE;
997       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
998       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
999       PetscCall(MatSetType(Bmpi, MATDENSE));
1000       PetscCall(MatSetUp(Bmpi));
1001     }
1002 
1003     /* create ScaLAPACK descriptor for B (1d block distribution) */
1004     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1005     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
1006     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1007     PetscCheckScaLapackInfo("descinit", info);
1008 
1009     /* redistribute matrix */
1010     PetscCall(MatDenseGetArray(Bmpi, &barray));
1011     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1012     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1013 
1014     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1015     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1016     if (reuse == MAT_INPLACE_MATRIX) {
1017       PetscCall(MatHeaderReplace(A, &Bmpi));
1018     } else *B = Bmpi;
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1024 {
1025   const PetscInt *ranges;
1026   PetscMPIInt     size;
1027   PetscInt        i, n;
1028 
1029   PetscFunctionBegin;
1030   *correct = PETSC_TRUE;
1031   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1032   if (size > 1) {
1033     PetscCall(PetscLayoutGetRanges(map, &ranges));
1034     n = ranges[1] - ranges[0];
1035     for (i = 1; i < size; i++)
1036       if (ranges[i + 1] - ranges[i] != n) break;
1037     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1043 {
1044   Mat_ScaLAPACK  *b;
1045   Mat             Bmpi;
1046   MPI_Comm        comm;
1047   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
1048   const PetscInt *ranges, *rows, *cols;
1049   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
1050   PetscScalar    *aarray;
1051   IS              ir, ic;
1052   PetscInt        lda;
1053   PetscBool       flg;
1054 
1055   PetscFunctionBegin;
1056   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1057 
1058   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1059   else {
1060     PetscCall(MatCreate(comm, &Bmpi));
1061     m = PETSC_DECIDE;
1062     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1063     n = PETSC_DECIDE;
1064     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1065     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1066     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1067     PetscCall(MatSetUp(Bmpi));
1068   }
1069   b = (Mat_ScaLAPACK *)Bmpi->data;
1070 
1071   PetscCall(MatDenseGetLDA(A, &lda));
1072   PetscCall(MatDenseGetArray(A, &aarray));
1073   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1074   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1075   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1076     /* create ScaLAPACK descriptor for A (1d block distribution) */
1077     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1078     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1079     lld = PetscMax(lda, 1);                       /* local leading dimension */
1080     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1081     PetscCheckScaLapackInfo("descinit", info);
1082 
1083     /* redistribute matrix */
1084     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1085     Bmpi->nooffprocentries = PETSC_TRUE;
1086   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1087     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1088     b->roworiented = PETSC_FALSE;
1089     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1090     PetscCall(ISGetIndices(ir, &rows));
1091     PetscCall(ISGetIndices(ic, &cols));
1092     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1093     PetscCall(ISRestoreIndices(ir, &rows));
1094     PetscCall(ISRestoreIndices(ic, &cols));
1095     PetscCall(ISDestroy(&ic));
1096     PetscCall(ISDestroy(&ir));
1097   }
1098   PetscCall(MatDenseRestoreArray(A, &aarray));
1099   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1100   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1101   if (reuse == MAT_INPLACE_MATRIX) {
1102     PetscCall(MatHeaderReplace(A, &Bmpi));
1103   } else *B = Bmpi;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1108 {
1109   Mat                mat_scal;
1110   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1111   const PetscInt    *cols;
1112   const PetscScalar *vals;
1113 
1114   PetscFunctionBegin;
1115   if (reuse == MAT_REUSE_MATRIX) {
1116     mat_scal = *newmat;
1117     PetscCall(MatZeroEntries(mat_scal));
1118   } else {
1119     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1120     m = PETSC_DECIDE;
1121     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1122     n = PETSC_DECIDE;
1123     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1124     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1125     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1126     PetscCall(MatSetUp(mat_scal));
1127   }
1128   for (row = rstart; row < rend; row++) {
1129     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1130     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1131     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1132   }
1133   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1134   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1135 
1136   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1137   else *newmat = mat_scal;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1142 {
1143   Mat                mat_scal;
1144   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1145   const PetscInt    *cols;
1146   const PetscScalar *vals;
1147   PetscScalar        v;
1148 
1149   PetscFunctionBegin;
1150   if (reuse == MAT_REUSE_MATRIX) {
1151     mat_scal = *newmat;
1152     PetscCall(MatZeroEntries(mat_scal));
1153   } else {
1154     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1155     m = PETSC_DECIDE;
1156     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1157     n = PETSC_DECIDE;
1158     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1159     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1160     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1161     PetscCall(MatSetUp(mat_scal));
1162   }
1163   PetscCall(MatGetRowUpperTriangular(A));
1164   for (row = rstart; row < rend; row++) {
1165     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1166     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1167     for (j = 0; j < ncols; j++) { /* lower triangular part */
1168       if (cols[j] == row) continue;
1169       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1170       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1171     }
1172     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1173   }
1174   PetscCall(MatRestoreRowUpperTriangular(A));
1175   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1176   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1177 
1178   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1179   else *newmat = mat_scal;
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1184 {
1185   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1186   PetscInt       sz = 0;
1187 
1188   PetscFunctionBegin;
1189   PetscCall(PetscLayoutSetUp(A->rmap));
1190   PetscCall(PetscLayoutSetUp(A->cmap));
1191   if (!a->lld) a->lld = a->locr;
1192 
1193   PetscCall(PetscFree(a->loc));
1194   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1195   PetscCall(PetscCalloc1(sz, &a->loc));
1196 
1197   A->preallocated = PETSC_TRUE;
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1202 {
1203   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1204   Mat_ScaLAPACK_Grid *grid;
1205   PetscBool           flg;
1206   MPI_Comm            icomm;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(MatStashDestroy_Private(&A->stash));
1210   PetscCall(PetscFree(a->loc));
1211   PetscCall(PetscFree(a->pivots));
1212   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1213   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1214   if (--grid->grid_refct == 0) {
1215     Cblacs_gridexit(grid->ictxt);
1216     Cblacs_gridexit(grid->ictxrow);
1217     Cblacs_gridexit(grid->ictxcol);
1218     PetscCall(PetscFree(grid));
1219     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1220   }
1221   PetscCall(PetscCommDestroy(&icomm));
1222   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1223   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1224   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1225   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1226   PetscCall(PetscFree(A->data));
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1231 {
1232   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1233   PetscBLASInt   info = 0;
1234   PetscBool      flg;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(PetscLayoutSetUp(A->rmap));
1238   PetscCall(PetscLayoutSetUp(A->cmap));
1239 
1240   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1241   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1242   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1243   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1244   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1245 
1246   /* compute local sizes */
1247   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1248   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1249   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1250   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1251   a->lld  = PetscMax(1, a->locr);
1252 
1253   /* allocate local array */
1254   PetscCall(MatScaLAPACKSetPreallocation(A));
1255 
1256   /* set up ScaLAPACK descriptor */
1257   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1258   PetscCheckScaLapackInfo("descinit", info);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1263 {
1264   PetscInt nstash, reallocs;
1265 
1266   PetscFunctionBegin;
1267   if (A->nooffprocentries) PetscFunctionReturn(0);
1268   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1269   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1270   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1275 {
1276   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1277   PetscMPIInt    n;
1278   PetscInt       i, flg, *row, *col;
1279   PetscScalar   *val;
1280   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1281 
1282   PetscFunctionBegin;
1283   if (A->nooffprocentries) PetscFunctionReturn(0);
1284   while (1) {
1285     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1286     if (!flg) break;
1287     for (i = 0; i < n; i++) {
1288       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1289       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1290       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1291       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1292       switch (A->insertmode) {
1293       case INSERT_VALUES:
1294         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1295         break;
1296       case ADD_VALUES:
1297         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1298         break;
1299       default:
1300         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1301       }
1302     }
1303   }
1304   PetscCall(MatStashScatterEnd_Private(&A->stash));
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1309 {
1310   Mat      Adense, As;
1311   MPI_Comm comm;
1312 
1313   PetscFunctionBegin;
1314   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1315   PetscCall(MatCreate(comm, &Adense));
1316   PetscCall(MatSetType(Adense, MATDENSE));
1317   PetscCall(MatLoad(Adense, viewer));
1318   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1319   PetscCall(MatDestroy(&Adense));
1320   PetscCall(MatHeaderReplace(newMat, &As));
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /* -------------------------------------------------------------------*/
1325 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1326                                        0,
1327                                        0,
1328                                        MatMult_ScaLAPACK,
1329                                        /* 4*/ MatMultAdd_ScaLAPACK,
1330                                        MatMultTranspose_ScaLAPACK,
1331                                        MatMultTransposeAdd_ScaLAPACK,
1332                                        MatSolve_ScaLAPACK,
1333                                        MatSolveAdd_ScaLAPACK,
1334                                        0,
1335                                        /*10*/ 0,
1336                                        MatLUFactor_ScaLAPACK,
1337                                        MatCholeskyFactor_ScaLAPACK,
1338                                        0,
1339                                        MatTranspose_ScaLAPACK,
1340                                        /*15*/ MatGetInfo_ScaLAPACK,
1341                                        0,
1342                                        MatGetDiagonal_ScaLAPACK,
1343                                        MatDiagonalScale_ScaLAPACK,
1344                                        MatNorm_ScaLAPACK,
1345                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1346                                        MatAssemblyEnd_ScaLAPACK,
1347                                        MatSetOption_ScaLAPACK,
1348                                        MatZeroEntries_ScaLAPACK,
1349                                        /*24*/ 0,
1350                                        MatLUFactorSymbolic_ScaLAPACK,
1351                                        MatLUFactorNumeric_ScaLAPACK,
1352                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1353                                        MatCholeskyFactorNumeric_ScaLAPACK,
1354                                        /*29*/ MatSetUp_ScaLAPACK,
1355                                        0,
1356                                        0,
1357                                        0,
1358                                        0,
1359                                        /*34*/ MatDuplicate_ScaLAPACK,
1360                                        0,
1361                                        0,
1362                                        0,
1363                                        0,
1364                                        /*39*/ MatAXPY_ScaLAPACK,
1365                                        0,
1366                                        0,
1367                                        0,
1368                                        MatCopy_ScaLAPACK,
1369                                        /*44*/ 0,
1370                                        MatScale_ScaLAPACK,
1371                                        MatShift_ScaLAPACK,
1372                                        0,
1373                                        0,
1374                                        /*49*/ 0,
1375                                        0,
1376                                        0,
1377                                        0,
1378                                        0,
1379                                        /*54*/ 0,
1380                                        0,
1381                                        0,
1382                                        0,
1383                                        0,
1384                                        /*59*/ 0,
1385                                        MatDestroy_ScaLAPACK,
1386                                        MatView_ScaLAPACK,
1387                                        0,
1388                                        0,
1389                                        /*64*/ 0,
1390                                        0,
1391                                        0,
1392                                        0,
1393                                        0,
1394                                        /*69*/ 0,
1395                                        0,
1396                                        MatConvert_ScaLAPACK_Dense,
1397                                        0,
1398                                        0,
1399                                        /*74*/ 0,
1400                                        0,
1401                                        0,
1402                                        0,
1403                                        0,
1404                                        /*79*/ 0,
1405                                        0,
1406                                        0,
1407                                        0,
1408                                        MatLoad_ScaLAPACK,
1409                                        /*84*/ 0,
1410                                        0,
1411                                        0,
1412                                        0,
1413                                        0,
1414                                        /*89*/ 0,
1415                                        0,
1416                                        MatMatMultNumeric_ScaLAPACK,
1417                                        0,
1418                                        0,
1419                                        /*94*/ 0,
1420                                        0,
1421                                        0,
1422                                        MatMatTransposeMultNumeric_ScaLAPACK,
1423                                        0,
1424                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1425                                        0,
1426                                        0,
1427                                        MatConjugate_ScaLAPACK,
1428                                        0,
1429                                        /*104*/ 0,
1430                                        0,
1431                                        0,
1432                                        0,
1433                                        0,
1434                                        /*109*/ MatMatSolve_ScaLAPACK,
1435                                        0,
1436                                        0,
1437                                        0,
1438                                        MatMissingDiagonal_ScaLAPACK,
1439                                        /*114*/ 0,
1440                                        0,
1441                                        0,
1442                                        0,
1443                                        0,
1444                                        /*119*/ 0,
1445                                        MatHermitianTranspose_ScaLAPACK,
1446                                        0,
1447                                        0,
1448                                        0,
1449                                        /*124*/ 0,
1450                                        0,
1451                                        0,
1452                                        0,
1453                                        0,
1454                                        /*129*/ 0,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                        0,
1459                                        /*134*/ 0,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                        0,
1465                                        /*140*/ 0,
1466                                        0,
1467                                        0,
1468                                        0,
1469                                        0,
1470                                        /*145*/ 0,
1471                                        0,
1472                                        0,
1473                                        0,
1474                                        0,
1475                                        /*150*/ 0};
1476 
1477 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1478 {
1479   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1480   PetscInt           size = stash->size, nsends;
1481   PetscInt           count, *sindices, **rindices, i, j, l;
1482   PetscScalar      **rvalues, *svalues;
1483   MPI_Comm           comm = stash->comm;
1484   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1485   PetscMPIInt       *sizes, *nlengths, nreceives;
1486   PetscInt          *sp_idx, *sp_idy;
1487   PetscScalar       *sp_val;
1488   PetscMatStashSpace space, space_next;
1489   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1490   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1491 
1492   PetscFunctionBegin;
1493   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1494     InsertMode addv;
1495     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1496     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1497     mat->insertmode = addv; /* in case this processor had no cache */
1498   }
1499 
1500   bs2 = stash->bs * stash->bs;
1501 
1502   /*  first count number of contributors to each processor */
1503   PetscCall(PetscCalloc1(size, &nlengths));
1504   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1505 
1506   i = j = 0;
1507   space = stash->space_head;
1508   while (space) {
1509     space_next = space->next;
1510     for (l = 0; l < space->local_used; l++) {
1511       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1512       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1513       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1514       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1515       nlengths[j]++;
1516       owner[i] = j;
1517       i++;
1518     }
1519     space = space_next;
1520   }
1521 
1522   /* Now check what procs get messages - and compute nsends. */
1523   PetscCall(PetscCalloc1(size, &sizes));
1524   for (i = 0, nsends = 0; i < size; i++) {
1525     if (nlengths[i]) {
1526       sizes[i] = 1;
1527       nsends++;
1528     }
1529   }
1530 
1531   {
1532     PetscMPIInt *onodes, *olengths;
1533     /* Determine the number of messages to expect, their lengths, from from-ids */
1534     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1535     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1536     /* since clubbing row,col - lengths are multiplied by 2 */
1537     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1538     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1539     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1540     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1541     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1542     PetscCall(PetscFree(onodes));
1543     PetscCall(PetscFree(olengths));
1544   }
1545 
1546   /* do sends:
1547       1) starts[i] gives the starting index in svalues for stuff going to
1548          the ith processor
1549   */
1550   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1551   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1552   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1553   /* use 2 sends the first with all_a, the next with all_i and all_j */
1554   startv[0] = 0;
1555   starti[0] = 0;
1556   for (i = 1; i < size; i++) {
1557     startv[i] = startv[i - 1] + nlengths[i - 1];
1558     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1559   }
1560 
1561   i     = 0;
1562   space = stash->space_head;
1563   while (space) {
1564     space_next = space->next;
1565     sp_idx     = space->idx;
1566     sp_idy     = space->idy;
1567     sp_val     = space->val;
1568     for (l = 0; l < space->local_used; l++) {
1569       j = owner[i];
1570       if (bs2 == 1) {
1571         svalues[startv[j]] = sp_val[l];
1572       } else {
1573         PetscInt     k;
1574         PetscScalar *buf1, *buf2;
1575         buf1 = svalues + bs2 * startv[j];
1576         buf2 = space->val + bs2 * l;
1577         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1578       }
1579       sindices[starti[j]]               = sp_idx[l];
1580       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1581       startv[j]++;
1582       starti[j]++;
1583       i++;
1584     }
1585     space = space_next;
1586   }
1587   startv[0] = 0;
1588   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1589 
1590   for (i = 0, count = 0; i < size; i++) {
1591     if (sizes[i]) {
1592       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1593       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1594     }
1595   }
1596 #if defined(PETSC_USE_INFO)
1597   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1598   for (i = 0; i < size; i++) {
1599     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1600   }
1601 #endif
1602   PetscCall(PetscFree(nlengths));
1603   PetscCall(PetscFree(owner));
1604   PetscCall(PetscFree2(startv, starti));
1605   PetscCall(PetscFree(sizes));
1606 
1607   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1608   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1609 
1610   for (i = 0; i < nreceives; i++) {
1611     recv_waits[2 * i]     = recv_waits1[i];
1612     recv_waits[2 * i + 1] = recv_waits2[i];
1613   }
1614   stash->recv_waits = recv_waits;
1615 
1616   PetscCall(PetscFree(recv_waits1));
1617   PetscCall(PetscFree(recv_waits2));
1618 
1619   stash->svalues         = svalues;
1620   stash->sindices        = sindices;
1621   stash->rvalues         = rvalues;
1622   stash->rindices        = rindices;
1623   stash->send_waits      = send_waits;
1624   stash->nsends          = nsends;
1625   stash->nrecvs          = nreceives;
1626   stash->reproduce_count = 0;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1631 {
1632   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1633 
1634   PetscFunctionBegin;
1635   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1636   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1637   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1638   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1639   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 /*@
1644    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1645    the `MATSCALAPACK` matrix
1646 
1647    Logically Collective
1648 
1649    Input Parameters:
1650 +  A  - a `MATSCALAPACK` matrix
1651 .  mb - the row block size
1652 -  nb - the column block size
1653 
1654    Level: intermediate
1655 
1656 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1657 @*/
1658 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1659 {
1660   PetscFunctionBegin;
1661   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1662   PetscValidLogicalCollectiveInt(A, mb, 2);
1663   PetscValidLogicalCollectiveInt(A, nb, 3);
1664   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1669 {
1670   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1671 
1672   PetscFunctionBegin;
1673   if (mb) *mb = a->mb;
1674   if (nb) *nb = a->nb;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 /*@
1679    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1680    the `MATSCALAPACK` matrix
1681 
1682    Not collective
1683 
1684    Input Parameter:
1685 .  A  - a `MATSCALAPACK` matrix
1686 
1687    Output Parameters:
1688 +  mb - the row block size
1689 -  nb - the column block size
1690 
1691    Level: intermediate
1692 
1693 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1694 @*/
1695 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1696 {
1697   PetscFunctionBegin;
1698   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1699   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1704 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1705 
1706 /*MC
1707    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1708 
1709    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1710 
1711    Options Database Keys:
1712 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()`
1713 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1714 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1715 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1716 
1717   Note:
1718    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1719    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1720    the given rank.
1721 
1722    Level: beginner
1723 
1724 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1725 M*/
1726 
1727 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1728 {
1729   Mat_ScaLAPACK      *a;
1730   PetscBool           flg, flg1;
1731   Mat_ScaLAPACK_Grid *grid;
1732   MPI_Comm            icomm;
1733   PetscBLASInt        nprow, npcol, myrow, mycol;
1734   PetscInt            optv1, k = 2, array[2] = {0, 0};
1735   PetscMPIInt         size;
1736 
1737   PetscFunctionBegin;
1738   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1739   A->insertmode = NOT_SET_VALUES;
1740 
1741   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1742   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1743   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1744   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1745   A->stash.ScatterDestroy = NULL;
1746 
1747   PetscCall(PetscNew(&a));
1748   A->data = (void *)a;
1749 
1750   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1751   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1752     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1753     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1754     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1755   }
1756   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1757   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1758   if (!flg) {
1759     PetscCall(PetscNew(&grid));
1760 
1761     PetscCallMPI(MPI_Comm_size(icomm, &size));
1762     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1763 
1764     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1765     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1766     if (flg1) {
1767       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1768       grid->nprow = optv1;
1769     }
1770     PetscOptionsEnd();
1771 
1772     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1773     grid->npcol = size / grid->nprow;
1774     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1775     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1776     grid->ictxt = Csys2blacs_handle(icomm);
1777     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1778     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1779     grid->grid_refct = 1;
1780     grid->nprow      = nprow;
1781     grid->npcol      = npcol;
1782     grid->myrow      = myrow;
1783     grid->mycol      = mycol;
1784     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1785     grid->ictxrow = Csys2blacs_handle(icomm);
1786     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1787     grid->ictxcol = Csys2blacs_handle(icomm);
1788     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1789     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1790 
1791   } else grid->grid_refct++;
1792   PetscCall(PetscCommDestroy(&icomm));
1793   a->grid = grid;
1794   a->mb   = DEFAULT_BLOCKSIZE;
1795   a->nb   = DEFAULT_BLOCKSIZE;
1796 
1797   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1798   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1799   if (flg) {
1800     a->mb = array[0];
1801     a->nb = (k > 1) ? array[1] : a->mb;
1802   }
1803   PetscOptionsEnd();
1804 
1805   a->roworiented = PETSC_TRUE;
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1809   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 /*@C
1814    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1815    (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1816 
1817    Collective
1818 
1819    Input Parameters:
1820 +  comm - MPI communicator
1821 .  mb   - row block size (or `PETSC_DECIDE` to have it set)
1822 .  nb   - column block size (or `PETSC_DECIDE` to have it set)
1823 .  M    - number of global rows
1824 .  N    - number of global columns
1825 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1826 -  csrc - coordinate of process that owns the first column of the distributed matrix
1827 
1828    Output Parameter:
1829 .  A - the matrix
1830 
1831    Options Database Key:
1832 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1833 
1834    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1835    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1836    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1837 
1838    Note:
1839    If `PETSC_DECIDE` is used for the block sizes, then an appropriate value
1840    is chosen.
1841 
1842    Storage Information:
1843    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1844    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1845    significance and are thus ignored. The block sizes refer to the values
1846    used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1847 
1848    Level: intermediate
1849 
1850 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1851 @*/
1852 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1853 {
1854   Mat_ScaLAPACK *a;
1855   PetscInt       m, n;
1856 
1857   PetscFunctionBegin;
1858   PetscCall(MatCreate(comm, A));
1859   PetscCall(MatSetType(*A, MATSCALAPACK));
1860   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1861   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1862   m = PETSC_DECIDE;
1863   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1864   n = PETSC_DECIDE;
1865   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1866   PetscCall(MatSetSizes(*A, m, n, M, N));
1867   a = (Mat_ScaLAPACK *)(*A)->data;
1868   PetscCall(PetscBLASIntCast(M, &a->M));
1869   PetscCall(PetscBLASIntCast(N, &a->N));
1870   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1871   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1872   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1873   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1874   PetscCall(MatSetUp(*A));
1875   PetscFunctionReturn(0);
1876 }
1877