xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 0ee64c2acf658cf46a5cdae9f9a6779cc8288de1)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     ylld = PetscMax(1, A->rmap->n);
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     ylld = PetscMax(1, lszy);
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     dlld = PetscMax(1, A->rmap->n);
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     dlld = PetscMax(1, lszd);
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     dlld = PetscMax(1, A->rmap->n);
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     dlld = PetscMax(1, lszd);
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578 {
579   PetscFunctionBegin;
580   *missing = PETSC_FALSE;
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585 {
586   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587   PetscBLASInt   n, one = 1;
588 
589   PetscFunctionBegin;
590   n = x->lld * x->locc;
591   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596 {
597   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598   PetscBLASInt   i, n;
599   PetscScalar    v;
600 
601   PetscFunctionBegin;
602   n = PetscMin(x->M, x->N);
603   for (i = 1; i <= n; i++) {
604     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605     v += alpha;
606     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607   }
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612 {
613   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615   PetscBLASInt   one  = 1;
616   PetscScalar    beta = 1.0;
617 
618   PetscFunctionBegin;
619   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626 {
627   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637 {
638   Mat            Bs;
639   MPI_Comm       comm;
640   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
641 
642   PetscFunctionBegin;
643   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644   PetscCall(MatCreate(comm, &Bs));
645   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646   PetscCall(MatSetType(Bs, MATSCALAPACK));
647   b       = (Mat_ScaLAPACK *)Bs->data;
648   b->M    = a->M;
649   b->N    = a->N;
650   b->mb   = a->mb;
651   b->nb   = a->nb;
652   b->rsrc = a->rsrc;
653   b->csrc = a->csrc;
654   PetscCall(MatSetUp(Bs));
655   *B = Bs;
656   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657   Bs->assembled = PETSC_TRUE;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662 {
663   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664   Mat            Bs   = *B;
665   PetscBLASInt   one  = 1;
666   PetscScalar    sone = 1.0, zero = 0.0;
667 #if defined(PETSC_USE_COMPLEX)
668   PetscInt i, j;
669 #endif
670 
671   PetscFunctionBegin;
672   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675   *B = Bs;
676   b  = (Mat_ScaLAPACK *)Bs->data;
677   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678 #if defined(PETSC_USE_COMPLEX)
679   /* undo conjugation */
680   for (i = 0; i < b->locr; i++)
681     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682 #endif
683   Bs->assembled = PETSC_TRUE;
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688 {
689   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690   PetscInt       i, j;
691 
692   PetscFunctionBegin;
693   for (i = 0; i < a->locr; i++)
694     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699 {
700   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701   Mat            Bs   = *B;
702   PetscBLASInt   one  = 1;
703   PetscScalar    sone = 1.0, zero = 0.0;
704 
705   PetscFunctionBegin;
706   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708   *B = Bs;
709   b  = (Mat_ScaLAPACK *)Bs->data;
710   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711   Bs->assembled = PETSC_TRUE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716 {
717   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718   PetscScalar    *x, *x2d;
719   const PetscInt *ranges;
720   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
721 
722   PetscFunctionBegin;
723   PetscCall(VecCopy(B, X));
724   PetscCall(VecGetArray(X, &x));
725 
726   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729   xlld = PetscMax(1, A->rmap->n);
730   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731   PetscCheckScaLapackInfo("descinit", info);
732 
733   /* allocate 2d vector */
734   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735   PetscCall(PetscMalloc1(lszx, &x2d));
736   xlld = PetscMax(1, lszx);
737 
738   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740   PetscCheckScaLapackInfo("descinit", info);
741 
742   /* redistribute x as a column of a 2d matrix */
743   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
744 
745   /* call ScaLAPACK subroutine */
746   switch (A->factortype) {
747   case MAT_FACTOR_LU:
748     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749     PetscCheckScaLapackInfo("getrs", info);
750     break;
751   case MAT_FACTOR_CHOLESKY:
752     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753     PetscCheckScaLapackInfo("potrs", info);
754     break;
755   default:
756     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757   }
758 
759   /* redistribute x from a column of a 2d matrix */
760   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
761 
762   PetscCall(PetscFree(x2d));
763   PetscCall(VecRestoreArray(X, &x));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771   PetscCall(VecAXPY(X, 1, Y));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776 {
777   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
778   PetscBool      flg1, flg2;
779   PetscBLASInt   one = 1, info;
780 
781   PetscFunctionBegin;
782   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
783   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
784   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
785   MatScaLAPACKCheckDistribution(B, 1, X, 2);
786   b = (Mat_ScaLAPACK *)B->data;
787   x = (Mat_ScaLAPACK *)X->data;
788   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
789 
790   switch (A->factortype) {
791   case MAT_FACTOR_LU:
792     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
793     PetscCheckScaLapackInfo("getrs", info);
794     break;
795   case MAT_FACTOR_CHOLESKY:
796     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
797     PetscCheckScaLapackInfo("potrs", info);
798     break;
799   default:
800     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
801   }
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
806 {
807   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
808   PetscBLASInt   one = 1, info;
809 
810   PetscFunctionBegin;
811   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
812   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
813   PetscCheckScaLapackInfo("getrf", info);
814   A->factortype = MAT_FACTOR_LU;
815   A->assembled  = PETSC_TRUE;
816 
817   PetscCall(PetscFree(A->solvertype));
818   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
823 {
824   PetscFunctionBegin;
825   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
826   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
830 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
831 {
832   PetscFunctionBegin;
833   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
838 {
839   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
840   PetscBLASInt   one = 1, info;
841 
842   PetscFunctionBegin;
843   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
844   PetscCheckScaLapackInfo("potrf", info);
845   A->factortype = MAT_FACTOR_CHOLESKY;
846   A->assembled  = PETSC_TRUE;
847 
848   PetscCall(PetscFree(A->solvertype));
849   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
854 {
855   PetscFunctionBegin;
856   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
857   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
862 {
863   PetscFunctionBegin;
864   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
869 {
870   PetscFunctionBegin;
871   *type = MATSOLVERSCALAPACK;
872   PetscFunctionReturn(PETSC_SUCCESS);
873 }
874 
875 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
876 {
877   Mat            B;
878   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
879 
880   PetscFunctionBegin;
881   /* Create the factorization matrix */
882   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
883   B->trivialsymbolic = PETSC_TRUE;
884   B->factortype      = ftype;
885   PetscCall(PetscFree(B->solvertype));
886   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
887 
888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
889   *F = B;
890   PetscFunctionReturn(PETSC_SUCCESS);
891 }
892 
893 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
894 {
895   PetscFunctionBegin;
896   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
897   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
902 {
903   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
904   PetscBLASInt   one = 1, lwork = 0;
905   const char    *ntype;
906   PetscScalar   *work = NULL, dummy;
907 
908   PetscFunctionBegin;
909   switch (type) {
910   case NORM_1:
911     ntype = "1";
912     lwork = PetscMax(a->locr, a->locc);
913     break;
914   case NORM_FROBENIUS:
915     ntype = "F";
916     work  = &dummy;
917     break;
918   case NORM_INFINITY:
919     ntype = "I";
920     lwork = PetscMax(a->locr, a->locc);
921     break;
922   default:
923     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
924   }
925   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
926   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
927   if (lwork) PetscCall(PetscFree(work));
928   PetscFunctionReturn(PETSC_SUCCESS);
929 }
930 
931 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
932 {
933   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
934 
935   PetscFunctionBegin;
936   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
937   PetscFunctionReturn(PETSC_SUCCESS);
938 }
939 
940 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
941 {
942   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
943   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
944 
945   PetscFunctionBegin;
946   if (rows) {
947     n     = a->locr;
948     nb    = a->mb;
949     isrc  = a->rsrc;
950     nproc = a->grid->nprow;
951     iproc = a->grid->myrow;
952     PetscCall(PetscMalloc1(n, &idx));
953     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
954     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
955   }
956   if (cols) {
957     n     = a->locc;
958     nb    = a->nb;
959     isrc  = a->csrc;
960     nproc = a->grid->npcol;
961     iproc = a->grid->mycol;
962     PetscCall(PetscMalloc1(n, &idx));
963     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
964     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
965   }
966   PetscFunctionReturn(PETSC_SUCCESS);
967 }
968 
969 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
970 {
971   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
972   Mat                Bmpi;
973   MPI_Comm           comm;
974   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
975   const PetscInt    *ranges, *branges, *cwork;
976   const PetscScalar *vwork;
977   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
978   PetscScalar       *barray;
979   PetscBool          differ = PETSC_FALSE;
980   PetscMPIInt        size;
981 
982   PetscFunctionBegin;
983   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
984   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
985 
986   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
987     PetscCallMPI(MPI_Comm_size(comm, &size));
988     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
989     for (i = 0; i < size; i++)
990       if (ranges[i + 1] != branges[i + 1]) {
991         differ = PETSC_TRUE;
992         break;
993       }
994   }
995 
996   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
997     PetscCall(MatCreate(comm, &Bmpi));
998     m = PETSC_DECIDE;
999     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1000     n = PETSC_DECIDE;
1001     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1002     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1003     PetscCall(MatSetType(Bmpi, MATDENSE));
1004     PetscCall(MatSetUp(Bmpi));
1005 
1006     /* create ScaLAPACK descriptor for B (1d block distribution) */
1007     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1008     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1009     lld = PetscMax(ldb, 1); /* local leading dimension */
1010     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1011     PetscCheckScaLapackInfo("descinit", info);
1012 
1013     /* redistribute matrix */
1014     PetscCall(MatDenseGetArray(Bmpi, &barray));
1015     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1016     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1017     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1018     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1019 
1020     /* transfer rows of auxiliary matrix to the final matrix B */
1021     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1022     for (i = rstart; i < rend; i++) {
1023       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1024       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1025       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1026     }
1027     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1028     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatDestroy(&Bmpi));
1030 
1031   } else { /* normal cases */
1032 
1033     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1034     else {
1035       PetscCall(MatCreate(comm, &Bmpi));
1036       m = PETSC_DECIDE;
1037       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1038       n = PETSC_DECIDE;
1039       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1040       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1041       PetscCall(MatSetType(Bmpi, MATDENSE));
1042       PetscCall(MatSetUp(Bmpi));
1043     }
1044 
1045     /* create ScaLAPACK descriptor for B (1d block distribution) */
1046     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1047     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1048     lld = PetscMax(ldb, 1); /* local leading dimension */
1049     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1050     PetscCheckScaLapackInfo("descinit", info);
1051 
1052     /* redistribute matrix */
1053     PetscCall(MatDenseGetArray(Bmpi, &barray));
1054     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1055     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1056 
1057     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1058     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1059     if (reuse == MAT_INPLACE_MATRIX) {
1060       PetscCall(MatHeaderReplace(A, &Bmpi));
1061     } else *B = Bmpi;
1062   }
1063   PetscFunctionReturn(PETSC_SUCCESS);
1064 }
1065 
1066 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1067 {
1068   const PetscInt *ranges;
1069   PetscMPIInt     size;
1070   PetscInt        i, n;
1071 
1072   PetscFunctionBegin;
1073   *correct = PETSC_TRUE;
1074   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1075   if (size > 1) {
1076     PetscCall(PetscLayoutGetRanges(map, &ranges));
1077     n = ranges[1] - ranges[0];
1078     for (i = 1; i < size; i++)
1079       if (ranges[i + 1] - ranges[i] != n) break;
1080     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1081   }
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1086 {
1087   Mat_ScaLAPACK     *b;
1088   Mat                Bmpi;
1089   MPI_Comm           comm;
1090   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1091   const PetscInt    *ranges, *rows, *cols;
1092   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1093   const PetscScalar *aarray;
1094   IS                 ir, ic;
1095   PetscInt           lda;
1096   PetscBool          flg;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1100 
1101   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1102   else {
1103     PetscCall(MatCreate(comm, &Bmpi));
1104     m = PETSC_DECIDE;
1105     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1106     n = PETSC_DECIDE;
1107     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1108     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1109     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1110     PetscCall(MatSetUp(Bmpi));
1111   }
1112   b = (Mat_ScaLAPACK *)Bmpi->data;
1113 
1114   PetscCall(MatDenseGetLDA(A, &lda));
1115   PetscCall(MatDenseGetArrayRead(A, &aarray));
1116   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1117   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1118   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1119     /* create ScaLAPACK descriptor for A (1d block distribution) */
1120     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1121     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1122     lld = PetscMax(lda, 1);                       /* local leading dimension */
1123     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1124     PetscCheckScaLapackInfo("descinit", info);
1125 
1126     /* redistribute matrix */
1127     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1128     Bmpi->nooffprocentries = PETSC_TRUE;
1129   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1130     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1131     b->roworiented = PETSC_FALSE;
1132     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1133     PetscCall(ISGetIndices(ir, &rows));
1134     PetscCall(ISGetIndices(ic, &cols));
1135     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1136     PetscCall(ISRestoreIndices(ir, &rows));
1137     PetscCall(ISRestoreIndices(ic, &cols));
1138     PetscCall(ISDestroy(&ic));
1139     PetscCall(ISDestroy(&ir));
1140   }
1141   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1142   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1143   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1144   if (reuse == MAT_INPLACE_MATRIX) {
1145     PetscCall(MatHeaderReplace(A, &Bmpi));
1146   } else *B = Bmpi;
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1151 {
1152   Mat                mat_scal;
1153   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1154   const PetscInt    *cols;
1155   const PetscScalar *vals;
1156 
1157   PetscFunctionBegin;
1158   if (reuse == MAT_REUSE_MATRIX) {
1159     mat_scal = *newmat;
1160     PetscCall(MatZeroEntries(mat_scal));
1161   } else {
1162     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1163     m = PETSC_DECIDE;
1164     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1165     n = PETSC_DECIDE;
1166     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1167     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1168     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1169     PetscCall(MatSetUp(mat_scal));
1170   }
1171   for (row = rstart; row < rend; row++) {
1172     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1173     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1174     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1175   }
1176   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1177   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1178 
1179   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1180   else *newmat = mat_scal;
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1185 {
1186   Mat                mat_scal;
1187   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1188   const PetscInt    *cols;
1189   const PetscScalar *vals;
1190   PetscScalar        v;
1191 
1192   PetscFunctionBegin;
1193   if (reuse == MAT_REUSE_MATRIX) {
1194     mat_scal = *newmat;
1195     PetscCall(MatZeroEntries(mat_scal));
1196   } else {
1197     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1198     m = PETSC_DECIDE;
1199     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1200     n = PETSC_DECIDE;
1201     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1202     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1203     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1204     PetscCall(MatSetUp(mat_scal));
1205   }
1206   PetscCall(MatGetRowUpperTriangular(A));
1207   for (row = rstart; row < rend; row++) {
1208     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1209     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1210     for (j = 0; j < ncols; j++) { /* lower triangular part */
1211       if (cols[j] == row) continue;
1212       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1213       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1214     }
1215     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1216   }
1217   PetscCall(MatRestoreRowUpperTriangular(A));
1218   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1219   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1220 
1221   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1222   else *newmat = mat_scal;
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1227 {
1228   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1229   PetscInt       sz = 0;
1230 
1231   PetscFunctionBegin;
1232   PetscCall(PetscLayoutSetUp(A->rmap));
1233   PetscCall(PetscLayoutSetUp(A->cmap));
1234   if (!a->lld) a->lld = a->locr;
1235 
1236   PetscCall(PetscFree(a->loc));
1237   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1238   PetscCall(PetscCalloc1(sz, &a->loc));
1239 
1240   A->preallocated = PETSC_TRUE;
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1245 {
1246   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1247   Mat_ScaLAPACK_Grid *grid;
1248   PetscBool           flg;
1249   MPI_Comm            icomm;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(MatStashDestroy_Private(&A->stash));
1253   PetscCall(PetscFree(a->loc));
1254   PetscCall(PetscFree(a->pivots));
1255   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1256   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1257   if (--grid->grid_refct == 0) {
1258     Cblacs_gridexit(grid->ictxt);
1259     Cblacs_gridexit(grid->ictxrow);
1260     Cblacs_gridexit(grid->ictxcol);
1261     PetscCall(PetscFree(grid));
1262     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1263   }
1264   PetscCall(PetscCommDestroy(&icomm));
1265   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1266   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1269   PetscCall(PetscFree(A->data));
1270   PetscFunctionReturn(PETSC_SUCCESS);
1271 }
1272 
1273 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1274 {
1275   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1276   PetscBLASInt   info = 0;
1277   PetscBool      flg;
1278 
1279   PetscFunctionBegin;
1280   PetscCall(PetscLayoutSetUp(A->rmap));
1281   PetscCall(PetscLayoutSetUp(A->cmap));
1282 
1283   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1284   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1285   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1286   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1287   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1288 
1289   /* compute local sizes */
1290   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1291   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1292   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1293   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1294   a->lld  = PetscMax(1, a->locr);
1295 
1296   /* allocate local array */
1297   PetscCall(MatScaLAPACKSetPreallocation(A));
1298 
1299   /* set up ScaLAPACK descriptor */
1300   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1301   PetscCheckScaLapackInfo("descinit", info);
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1306 {
1307   PetscInt nstash, reallocs;
1308 
1309   PetscFunctionBegin;
1310   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1311   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1312   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1313   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1314   PetscFunctionReturn(PETSC_SUCCESS);
1315 }
1316 
1317 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1318 {
1319   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1320   PetscMPIInt    n;
1321   PetscInt       i, flg, *row, *col;
1322   PetscScalar   *val;
1323   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1324 
1325   PetscFunctionBegin;
1326   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1327   while (1) {
1328     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1329     if (!flg) break;
1330     for (i = 0; i < n; i++) {
1331       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1332       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1333       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1334       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1335       switch (A->insertmode) {
1336       case INSERT_VALUES:
1337         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1338         break;
1339       case ADD_VALUES:
1340         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1341         break;
1342       default:
1343         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1344       }
1345     }
1346   }
1347   PetscCall(MatStashScatterEnd_Private(&A->stash));
1348   PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350 
1351 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1352 {
1353   Mat      Adense, As;
1354   MPI_Comm comm;
1355 
1356   PetscFunctionBegin;
1357   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1358   PetscCall(MatCreate(comm, &Adense));
1359   PetscCall(MatSetType(Adense, MATDENSE));
1360   PetscCall(MatLoad(Adense, viewer));
1361   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1362   PetscCall(MatDestroy(&Adense));
1363   PetscCall(MatHeaderReplace(newMat, &As));
1364   PetscFunctionReturn(PETSC_SUCCESS);
1365 }
1366 
1367 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1368                                        0,
1369                                        0,
1370                                        MatMult_ScaLAPACK,
1371                                        /* 4*/ MatMultAdd_ScaLAPACK,
1372                                        MatMultTranspose_ScaLAPACK,
1373                                        MatMultTransposeAdd_ScaLAPACK,
1374                                        MatSolve_ScaLAPACK,
1375                                        MatSolveAdd_ScaLAPACK,
1376                                        0,
1377                                        /*10*/ 0,
1378                                        MatLUFactor_ScaLAPACK,
1379                                        MatCholeskyFactor_ScaLAPACK,
1380                                        0,
1381                                        MatTranspose_ScaLAPACK,
1382                                        /*15*/ MatGetInfo_ScaLAPACK,
1383                                        0,
1384                                        MatGetDiagonal_ScaLAPACK,
1385                                        MatDiagonalScale_ScaLAPACK,
1386                                        MatNorm_ScaLAPACK,
1387                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1388                                        MatAssemblyEnd_ScaLAPACK,
1389                                        MatSetOption_ScaLAPACK,
1390                                        MatZeroEntries_ScaLAPACK,
1391                                        /*24*/ 0,
1392                                        MatLUFactorSymbolic_ScaLAPACK,
1393                                        MatLUFactorNumeric_ScaLAPACK,
1394                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1395                                        MatCholeskyFactorNumeric_ScaLAPACK,
1396                                        /*29*/ MatSetUp_ScaLAPACK,
1397                                        0,
1398                                        0,
1399                                        0,
1400                                        0,
1401                                        /*34*/ MatDuplicate_ScaLAPACK,
1402                                        0,
1403                                        0,
1404                                        0,
1405                                        0,
1406                                        /*39*/ MatAXPY_ScaLAPACK,
1407                                        0,
1408                                        0,
1409                                        0,
1410                                        MatCopy_ScaLAPACK,
1411                                        /*44*/ 0,
1412                                        MatScale_ScaLAPACK,
1413                                        MatShift_ScaLAPACK,
1414                                        0,
1415                                        0,
1416                                        /*49*/ 0,
1417                                        0,
1418                                        0,
1419                                        0,
1420                                        0,
1421                                        /*54*/ 0,
1422                                        0,
1423                                        0,
1424                                        0,
1425                                        0,
1426                                        /*59*/ 0,
1427                                        MatDestroy_ScaLAPACK,
1428                                        MatView_ScaLAPACK,
1429                                        0,
1430                                        0,
1431                                        /*64*/ 0,
1432                                        0,
1433                                        0,
1434                                        0,
1435                                        0,
1436                                        /*69*/ 0,
1437                                        0,
1438                                        MatConvert_ScaLAPACK_Dense,
1439                                        0,
1440                                        0,
1441                                        /*74*/ 0,
1442                                        0,
1443                                        0,
1444                                        0,
1445                                        0,
1446                                        /*79*/ 0,
1447                                        0,
1448                                        0,
1449                                        0,
1450                                        MatLoad_ScaLAPACK,
1451                                        /*84*/ 0,
1452                                        0,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        /*89*/ 0,
1457                                        0,
1458                                        MatMatMultNumeric_ScaLAPACK,
1459                                        0,
1460                                        0,
1461                                        /*94*/ 0,
1462                                        0,
1463                                        0,
1464                                        MatMatTransposeMultNumeric_ScaLAPACK,
1465                                        0,
1466                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1467                                        0,
1468                                        0,
1469                                        MatConjugate_ScaLAPACK,
1470                                        0,
1471                                        /*104*/ 0,
1472                                        0,
1473                                        0,
1474                                        0,
1475                                        0,
1476                                        /*109*/ MatMatSolve_ScaLAPACK,
1477                                        0,
1478                                        0,
1479                                        0,
1480                                        MatMissingDiagonal_ScaLAPACK,
1481                                        /*114*/ 0,
1482                                        0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                        /*119*/ 0,
1487                                        MatHermitianTranspose_ScaLAPACK,
1488                                        MatMultHermitianTranspose_ScaLAPACK,
1489                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1490                                        0,
1491                                        /*124*/ 0,
1492                                        0,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                        /*129*/ 0,
1497                                        0,
1498                                        0,
1499                                        MatTransposeMatMultNumeric_ScaLAPACK,
1500                                        0,
1501                                        /*134*/ 0,
1502                                        0,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                        /*140*/ 0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                        /*145*/ 0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                        /*150*/ 0,
1518                                        0};
1519 
1520 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1521 {
1522   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1523   PetscInt           size = stash->size, nsends;
1524   PetscInt           count, *sindices, **rindices, i, j, l;
1525   PetscScalar      **rvalues, *svalues;
1526   MPI_Comm           comm = stash->comm;
1527   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1528   PetscMPIInt       *sizes, *nlengths, nreceives;
1529   PetscInt          *sp_idx, *sp_idy;
1530   PetscScalar       *sp_val;
1531   PetscMatStashSpace space, space_next;
1532   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1533   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1534 
1535   PetscFunctionBegin;
1536   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1537     InsertMode addv;
1538     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1539     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1540     mat->insertmode = addv; /* in case this processor had no cache */
1541   }
1542 
1543   bs2 = stash->bs * stash->bs;
1544 
1545   /*  first count number of contributors to each processor */
1546   PetscCall(PetscCalloc1(size, &nlengths));
1547   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1548 
1549   i = j = 0;
1550   space = stash->space_head;
1551   while (space) {
1552     space_next = space->next;
1553     for (l = 0; l < space->local_used; l++) {
1554       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1555       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1556       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1557       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1558       nlengths[j]++;
1559       owner[i] = j;
1560       i++;
1561     }
1562     space = space_next;
1563   }
1564 
1565   /* Now check what procs get messages - and compute nsends. */
1566   PetscCall(PetscCalloc1(size, &sizes));
1567   for (i = 0, nsends = 0; i < size; i++) {
1568     if (nlengths[i]) {
1569       sizes[i] = 1;
1570       nsends++;
1571     }
1572   }
1573 
1574   {
1575     PetscMPIInt *onodes, *olengths;
1576     /* Determine the number of messages to expect, their lengths, from from-ids */
1577     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1578     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1579     /* since clubbing row,col - lengths are multiplied by 2 */
1580     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1581     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1582     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1583     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1584     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1585     PetscCall(PetscFree(onodes));
1586     PetscCall(PetscFree(olengths));
1587   }
1588 
1589   /* do sends:
1590       1) starts[i] gives the starting index in svalues for stuff going to
1591          the ith processor
1592   */
1593   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1594   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1595   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1596   /* use 2 sends the first with all_a, the next with all_i and all_j */
1597   startv[0] = 0;
1598   starti[0] = 0;
1599   for (i = 1; i < size; i++) {
1600     startv[i] = startv[i - 1] + nlengths[i - 1];
1601     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1602   }
1603 
1604   i     = 0;
1605   space = stash->space_head;
1606   while (space) {
1607     space_next = space->next;
1608     sp_idx     = space->idx;
1609     sp_idy     = space->idy;
1610     sp_val     = space->val;
1611     for (l = 0; l < space->local_used; l++) {
1612       j = owner[i];
1613       if (bs2 == 1) {
1614         svalues[startv[j]] = sp_val[l];
1615       } else {
1616         PetscInt     k;
1617         PetscScalar *buf1, *buf2;
1618         buf1 = svalues + bs2 * startv[j];
1619         buf2 = space->val + bs2 * l;
1620         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1621       }
1622       sindices[starti[j]]               = sp_idx[l];
1623       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1624       startv[j]++;
1625       starti[j]++;
1626       i++;
1627     }
1628     space = space_next;
1629   }
1630   startv[0] = 0;
1631   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1632 
1633   for (i = 0, count = 0; i < size; i++) {
1634     if (sizes[i]) {
1635       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1636       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1637     }
1638   }
1639 #if defined(PETSC_USE_INFO)
1640   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1641   for (i = 0; i < size; i++) {
1642     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1643   }
1644 #endif
1645   PetscCall(PetscFree(nlengths));
1646   PetscCall(PetscFree(owner));
1647   PetscCall(PetscFree2(startv, starti));
1648   PetscCall(PetscFree(sizes));
1649 
1650   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1651   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1652 
1653   for (i = 0; i < nreceives; i++) {
1654     recv_waits[2 * i]     = recv_waits1[i];
1655     recv_waits[2 * i + 1] = recv_waits2[i];
1656   }
1657   stash->recv_waits = recv_waits;
1658 
1659   PetscCall(PetscFree(recv_waits1));
1660   PetscCall(PetscFree(recv_waits2));
1661 
1662   stash->svalues         = svalues;
1663   stash->sindices        = sindices;
1664   stash->rvalues         = rvalues;
1665   stash->rindices        = rindices;
1666   stash->send_waits      = send_waits;
1667   stash->nsends          = nsends;
1668   stash->nrecvs          = nreceives;
1669   stash->reproduce_count = 0;
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1674 {
1675   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1676 
1677   PetscFunctionBegin;
1678   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1679   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1680   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1681   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1682   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 /*@
1687   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1688   the `MATSCALAPACK` matrix
1689 
1690   Logically Collective
1691 
1692   Input Parameters:
1693 + A  - a `MATSCALAPACK` matrix
1694 . mb - the row block size
1695 - nb - the column block size
1696 
1697   Level: intermediate
1698 
1699   Note:
1700   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1701 
1702 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1703 @*/
1704 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1705 {
1706   PetscFunctionBegin;
1707   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1708   PetscValidLogicalCollectiveInt(A, mb, 2);
1709   PetscValidLogicalCollectiveInt(A, nb, 3);
1710   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1715 {
1716   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1717 
1718   PetscFunctionBegin;
1719   if (mb) *mb = a->mb;
1720   if (nb) *nb = a->nb;
1721   PetscFunctionReturn(PETSC_SUCCESS);
1722 }
1723 
1724 /*@
1725   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1726   the `MATSCALAPACK` matrix
1727 
1728   Not Collective
1729 
1730   Input Parameter:
1731 . A - a `MATSCALAPACK` matrix
1732 
1733   Output Parameters:
1734 + mb - the row block size
1735 - nb - the column block size
1736 
1737   Level: intermediate
1738 
1739   Note:
1740   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1741 
1742 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1743 @*/
1744 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1745 {
1746   PetscFunctionBegin;
1747   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1748   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1749   PetscFunctionReturn(PETSC_SUCCESS);
1750 }
1751 
1752 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1753 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1754 
1755 /*MC
1756    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1757 
1758    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1759 
1760    Options Database Keys:
1761 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1762 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1763 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1764 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1765 
1766    Level: intermediate
1767 
1768   Note:
1769    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1770    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1771    the given rank.
1772 
1773 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1774 M*/
1775 
1776 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1777 {
1778   Mat_ScaLAPACK      *a;
1779   PetscBool           flg, flg1;
1780   Mat_ScaLAPACK_Grid *grid;
1781   MPI_Comm            icomm;
1782   PetscBLASInt        nprow, npcol, myrow, mycol;
1783   PetscInt            optv1, k = 2, array[2] = {0, 0};
1784   PetscMPIInt         size;
1785 
1786   PetscFunctionBegin;
1787   A->ops[0]     = MatOps_Values;
1788   A->insertmode = NOT_SET_VALUES;
1789 
1790   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1791   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1792   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1793   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1794   A->stash.ScatterDestroy = NULL;
1795 
1796   PetscCall(PetscNew(&a));
1797   A->data = (void *)a;
1798 
1799   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1800   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1801     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1802     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1803     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1804   }
1805   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1806   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1807   if (!flg) {
1808     PetscCall(PetscNew(&grid));
1809 
1810     PetscCallMPI(MPI_Comm_size(icomm, &size));
1811     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1812 
1813     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1814     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1815     if (flg1) {
1816       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1817       grid->nprow = optv1;
1818     }
1819     PetscOptionsEnd();
1820 
1821     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1822     grid->npcol = size / grid->nprow;
1823     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1824     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1825     grid->ictxt = Csys2blacs_handle(icomm);
1826     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1827     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1828     grid->grid_refct = 1;
1829     grid->nprow      = nprow;
1830     grid->npcol      = npcol;
1831     grid->myrow      = myrow;
1832     grid->mycol      = mycol;
1833     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1834     grid->ictxrow = Csys2blacs_handle(icomm);
1835     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1836     grid->ictxcol = Csys2blacs_handle(icomm);
1837     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1838     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1839 
1840   } else grid->grid_refct++;
1841   PetscCall(PetscCommDestroy(&icomm));
1842   a->grid = grid;
1843   a->mb   = DEFAULT_BLOCKSIZE;
1844   a->nb   = DEFAULT_BLOCKSIZE;
1845 
1846   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1847   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1848   if (flg) {
1849     a->mb = array[0];
1850     a->nb = (k > 1) ? array[1] : a->mb;
1851   }
1852   PetscOptionsEnd();
1853 
1854   a->roworiented = PETSC_TRUE;
1855   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1856   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1857   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1858   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1859   PetscFunctionReturn(PETSC_SUCCESS);
1860 }
1861 
1862 /*@C
1863   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1864   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1865 
1866   Collective
1867 
1868   Input Parameters:
1869 + comm - MPI communicator
1870 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1871 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1872 . M    - number of global rows
1873 . N    - number of global columns
1874 . rsrc - coordinate of process that owns the first row of the distributed matrix
1875 - csrc - coordinate of process that owns the first column of the distributed matrix
1876 
1877   Output Parameter:
1878 . A - the matrix
1879 
1880   Options Database Key:
1881 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1882 
1883   Level: intermediate
1884 
1885   Notes:
1886   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1887 
1888   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1889   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1890   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1891 
1892   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1893   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1894   significance and are thus ignored. The block sizes refer to the values
1895   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1896 
1897 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1898 @*/
1899 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1900 {
1901   Mat_ScaLAPACK *a;
1902   PetscInt       m, n;
1903 
1904   PetscFunctionBegin;
1905   PetscCall(MatCreate(comm, A));
1906   PetscCall(MatSetType(*A, MATSCALAPACK));
1907   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1908   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1909   m = PETSC_DECIDE;
1910   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1911   n = PETSC_DECIDE;
1912   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1913   PetscCall(MatSetSizes(*A, m, n, M, N));
1914   a = (Mat_ScaLAPACK *)(*A)->data;
1915   PetscCall(PetscBLASIntCast(M, &a->M));
1916   PetscCall(PetscBLASIntCast(N, &a->N));
1917   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1918   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1919   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1920   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1921   PetscCall(MatSetUp(*A));
1922   PetscFunctionReturn(PETSC_SUCCESS);
1923 }
1924