xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision a29dfd43bb0c77e2653d3bfa2c953f902720a6d2)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     ylld = PetscMax(1, A->rmap->n);
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     ylld = PetscMax(1, lszy);
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     dlld = PetscMax(1, A->rmap->n);
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     dlld = PetscMax(1, lszd);
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     dlld = PetscMax(1, A->rmap->n);
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     dlld = PetscMax(1, lszd);
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578 {
579   PetscFunctionBegin;
580   *missing = PETSC_FALSE;
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585 {
586   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587   PetscBLASInt   n, one = 1;
588 
589   PetscFunctionBegin;
590   n = x->lld * x->locc;
591   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596 {
597   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598   PetscBLASInt   i, n;
599   PetscScalar    v;
600 
601   PetscFunctionBegin;
602   n = PetscMin(x->M, x->N);
603   for (i = 1; i <= n; i++) {
604     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605     v += alpha;
606     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607   }
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612 {
613   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615   PetscBLASInt   one  = 1;
616   PetscScalar    beta = 1.0;
617 
618   PetscFunctionBegin;
619   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626 {
627   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637 {
638   Mat            Bs;
639   MPI_Comm       comm;
640   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
641 
642   PetscFunctionBegin;
643   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644   PetscCall(MatCreate(comm, &Bs));
645   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646   PetscCall(MatSetType(Bs, MATSCALAPACK));
647   b       = (Mat_ScaLAPACK *)Bs->data;
648   b->M    = a->M;
649   b->N    = a->N;
650   b->mb   = a->mb;
651   b->nb   = a->nb;
652   b->rsrc = a->rsrc;
653   b->csrc = a->csrc;
654   PetscCall(MatSetUp(Bs));
655   *B = Bs;
656   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657   Bs->assembled = PETSC_TRUE;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662 {
663   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664   Mat            Bs   = *B;
665   PetscBLASInt   one  = 1;
666   PetscScalar    sone = 1.0, zero = 0.0;
667 #if defined(PETSC_USE_COMPLEX)
668   PetscInt i, j;
669 #endif
670 
671   PetscFunctionBegin;
672   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675   *B = Bs;
676   b  = (Mat_ScaLAPACK *)Bs->data;
677   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678 #if defined(PETSC_USE_COMPLEX)
679   /* undo conjugation */
680   for (i = 0; i < b->locr; i++)
681     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682 #endif
683   Bs->assembled = PETSC_TRUE;
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688 {
689   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690   PetscInt       i, j;
691 
692   PetscFunctionBegin;
693   for (i = 0; i < a->locr; i++)
694     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699 {
700   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701   Mat            Bs   = *B;
702   PetscBLASInt   one  = 1;
703   PetscScalar    sone = 1.0, zero = 0.0;
704 
705   PetscFunctionBegin;
706   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708   *B = Bs;
709   b  = (Mat_ScaLAPACK *)Bs->data;
710   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711   Bs->assembled = PETSC_TRUE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716 {
717   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718   PetscScalar    *x, *x2d;
719   const PetscInt *ranges;
720   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
721 
722   PetscFunctionBegin;
723   PetscCall(VecCopy(B, X));
724   PetscCall(VecGetArray(X, &x));
725 
726   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729   xlld = PetscMax(1, A->rmap->n);
730   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731   PetscCheckScaLapackInfo("descinit", info);
732 
733   /* allocate 2d vector */
734   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735   PetscCall(PetscMalloc1(lszx, &x2d));
736   xlld = PetscMax(1, lszx);
737 
738   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740   PetscCheckScaLapackInfo("descinit", info);
741 
742   /* redistribute x as a column of a 2d matrix */
743   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
744 
745   /* call ScaLAPACK subroutine */
746   switch (A->factortype) {
747   case MAT_FACTOR_LU:
748     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749     PetscCheckScaLapackInfo("getrs", info);
750     break;
751   case MAT_FACTOR_CHOLESKY:
752     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753     PetscCheckScaLapackInfo("potrs", info);
754     break;
755   default:
756     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757   }
758 
759   /* redistribute x from a column of a 2d matrix */
760   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
761 
762   PetscCall(PetscFree(x2d));
763   PetscCall(VecRestoreArray(X, &x));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771   PetscCall(VecAXPY(X, 1, Y));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776 {
777   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
778   PetscBool      flg1, flg2;
779   PetscBLASInt   one = 1, info;
780   Mat            C;
781   MatType        type;
782 
783   PetscFunctionBegin;
784   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
785   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
786   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
787   if (flg2) {
788     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
789     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
790     C = X;
791   } else {
792     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
793   }
794   x = (Mat_ScaLAPACK *)C->data;
795 
796   switch (A->factortype) {
797   case MAT_FACTOR_LU:
798     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
799     PetscCheckScaLapackInfo("getrs", info);
800     break;
801   case MAT_FACTOR_CHOLESKY:
802     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
803     PetscCheckScaLapackInfo("potrs", info);
804     break;
805   default:
806     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
807   }
808   if (!flg2) {
809     PetscCall(MatGetType(X, &type));
810     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
811     PetscCall(MatDestroy(&C));
812   }
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
817 {
818   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
819   PetscBLASInt   one = 1, info;
820 
821   PetscFunctionBegin;
822   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
823   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
824   PetscCheckScaLapackInfo("getrf", info);
825   A->factortype = MAT_FACTOR_LU;
826   A->assembled  = PETSC_TRUE;
827 
828   PetscCall(PetscFree(A->solvertype));
829   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
834 {
835   PetscFunctionBegin;
836   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
837   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
842 {
843   PetscFunctionBegin;
844   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
849 {
850   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
851   PetscBLASInt   one = 1, info;
852 
853   PetscFunctionBegin;
854   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
855   PetscCheckScaLapackInfo("potrf", info);
856   A->factortype = MAT_FACTOR_CHOLESKY;
857   A->assembled  = PETSC_TRUE;
858 
859   PetscCall(PetscFree(A->solvertype));
860   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
861   PetscFunctionReturn(PETSC_SUCCESS);
862 }
863 
864 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
865 {
866   PetscFunctionBegin;
867   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
868   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
873 {
874   PetscFunctionBegin;
875   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
880 {
881   PetscFunctionBegin;
882   *type = MATSOLVERSCALAPACK;
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
887 {
888   Mat            B;
889   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
890 
891   PetscFunctionBegin;
892   /* Create the factorization matrix */
893   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
894   B->trivialsymbolic = PETSC_TRUE;
895   B->factortype      = ftype;
896   PetscCall(PetscFree(B->solvertype));
897   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
898 
899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
900   *F = B;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
905 {
906   PetscFunctionBegin;
907   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
908   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
913 {
914   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
915   PetscBLASInt   one = 1, lwork = 0;
916   const char    *ntype;
917   PetscScalar   *work = NULL, dummy;
918 
919   PetscFunctionBegin;
920   switch (type) {
921   case NORM_1:
922     ntype = "1";
923     lwork = PetscMax(a->locr, a->locc);
924     break;
925   case NORM_FROBENIUS:
926     ntype = "F";
927     work  = &dummy;
928     break;
929   case NORM_INFINITY:
930     ntype = "I";
931     lwork = PetscMax(a->locr, a->locc);
932     break;
933   default:
934     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
935   }
936   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
937   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
938   if (lwork) PetscCall(PetscFree(work));
939   PetscFunctionReturn(PETSC_SUCCESS);
940 }
941 
942 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
943 {
944   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
945 
946   PetscFunctionBegin;
947   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
952 {
953   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
954   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
955 
956   PetscFunctionBegin;
957   if (rows) {
958     n     = a->locr;
959     nb    = a->mb;
960     isrc  = a->rsrc;
961     nproc = a->grid->nprow;
962     iproc = a->grid->myrow;
963     PetscCall(PetscMalloc1(n, &idx));
964     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
965     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
966   }
967   if (cols) {
968     n     = a->locc;
969     nb    = a->nb;
970     isrc  = a->csrc;
971     nproc = a->grid->npcol;
972     iproc = a->grid->mycol;
973     PetscCall(PetscMalloc1(n, &idx));
974     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
975     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
976   }
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
981 {
982   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
983   Mat                Bmpi;
984   MPI_Comm           comm;
985   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
986   const PetscInt    *ranges, *branges, *cwork;
987   const PetscScalar *vwork;
988   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
989   PetscScalar       *barray;
990   PetscBool          differ = PETSC_FALSE;
991   PetscMPIInt        size;
992 
993   PetscFunctionBegin;
994   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
995   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
996 
997   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
998     PetscCallMPI(MPI_Comm_size(comm, &size));
999     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
1000     for (i = 0; i < size; i++)
1001       if (ranges[i + 1] != branges[i + 1]) {
1002         differ = PETSC_TRUE;
1003         break;
1004       }
1005   }
1006 
1007   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1008     PetscCall(MatCreate(comm, &Bmpi));
1009     m = PETSC_DECIDE;
1010     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1011     n = PETSC_DECIDE;
1012     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1013     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1014     PetscCall(MatSetType(Bmpi, MATDENSE));
1015     PetscCall(MatSetUp(Bmpi));
1016 
1017     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1019     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1020     lld = PetscMax(ldb, 1); /* local leading dimension */
1021     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1022     PetscCheckScaLapackInfo("descinit", info);
1023 
1024     /* redistribute matrix */
1025     PetscCall(MatDenseGetArray(Bmpi, &barray));
1026     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1027     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1028     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1030 
1031     /* transfer rows of auxiliary matrix to the final matrix B */
1032     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1033     for (i = rstart; i < rend; i++) {
1034       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1035       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1036       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1037     }
1038     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1039     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1040     PetscCall(MatDestroy(&Bmpi));
1041 
1042   } else { /* normal cases */
1043 
1044     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1045     else {
1046       PetscCall(MatCreate(comm, &Bmpi));
1047       m = PETSC_DECIDE;
1048       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1049       n = PETSC_DECIDE;
1050       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1051       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1052       PetscCall(MatSetType(Bmpi, MATDENSE));
1053       PetscCall(MatSetUp(Bmpi));
1054     }
1055 
1056     /* create ScaLAPACK descriptor for B (1d block distribution) */
1057     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1058     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1059     lld = PetscMax(ldb, 1); /* local leading dimension */
1060     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1061     PetscCheckScaLapackInfo("descinit", info);
1062 
1063     /* redistribute matrix */
1064     PetscCall(MatDenseGetArray(Bmpi, &barray));
1065     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1066     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1067 
1068     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1070     if (reuse == MAT_INPLACE_MATRIX) {
1071       PetscCall(MatHeaderReplace(A, &Bmpi));
1072     } else *B = Bmpi;
1073   }
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1078 {
1079   const PetscInt *ranges;
1080   PetscMPIInt     size;
1081   PetscInt        i, n;
1082 
1083   PetscFunctionBegin;
1084   *correct = PETSC_TRUE;
1085   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1086   if (size > 1) {
1087     PetscCall(PetscLayoutGetRanges(map, &ranges));
1088     n = ranges[1] - ranges[0];
1089     for (i = 1; i < size; i++)
1090       if (ranges[i + 1] - ranges[i] != n) break;
1091     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1092   }
1093   PetscFunctionReturn(PETSC_SUCCESS);
1094 }
1095 
1096 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1097 {
1098   Mat_ScaLAPACK     *b;
1099   Mat                Bmpi;
1100   MPI_Comm           comm;
1101   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1102   const PetscInt    *ranges, *rows, *cols;
1103   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1104   const PetscScalar *aarray;
1105   IS                 ir, ic;
1106   PetscInt           lda;
1107   PetscBool          flg;
1108 
1109   PetscFunctionBegin;
1110   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1111 
1112   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1113   else {
1114     PetscCall(MatCreate(comm, &Bmpi));
1115     m = PETSC_DECIDE;
1116     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1117     n = PETSC_DECIDE;
1118     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1119     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1120     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1121     PetscCall(MatSetUp(Bmpi));
1122   }
1123   b = (Mat_ScaLAPACK *)Bmpi->data;
1124 
1125   PetscCall(MatDenseGetLDA(A, &lda));
1126   PetscCall(MatDenseGetArrayRead(A, &aarray));
1127   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1128   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1129   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1130     /* create ScaLAPACK descriptor for A (1d block distribution) */
1131     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1132     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1133     lld = PetscMax(lda, 1);                       /* local leading dimension */
1134     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1135     PetscCheckScaLapackInfo("descinit", info);
1136 
1137     /* redistribute matrix */
1138     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1139     Bmpi->nooffprocentries = PETSC_TRUE;
1140   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1141     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1142     b->roworiented = PETSC_FALSE;
1143     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1144     PetscCall(ISGetIndices(ir, &rows));
1145     PetscCall(ISGetIndices(ic, &cols));
1146     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1147     PetscCall(ISRestoreIndices(ir, &rows));
1148     PetscCall(ISRestoreIndices(ic, &cols));
1149     PetscCall(ISDestroy(&ic));
1150     PetscCall(ISDestroy(&ir));
1151   }
1152   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1153   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1154   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1155   if (reuse == MAT_INPLACE_MATRIX) {
1156     PetscCall(MatHeaderReplace(A, &Bmpi));
1157   } else *B = Bmpi;
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1162 {
1163   Mat                mat_scal;
1164   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1165   const PetscInt    *cols;
1166   const PetscScalar *vals;
1167 
1168   PetscFunctionBegin;
1169   if (reuse == MAT_REUSE_MATRIX) {
1170     mat_scal = *newmat;
1171     PetscCall(MatZeroEntries(mat_scal));
1172   } else {
1173     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1174     m = PETSC_DECIDE;
1175     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1176     n = PETSC_DECIDE;
1177     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1178     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1179     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1180     PetscCall(MatSetUp(mat_scal));
1181   }
1182   for (row = rstart; row < rend; row++) {
1183     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1184     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1185     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1186   }
1187   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1188   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1189 
1190   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1191   else *newmat = mat_scal;
1192   PetscFunctionReturn(PETSC_SUCCESS);
1193 }
1194 
1195 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1196 {
1197   Mat                mat_scal;
1198   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1199   const PetscInt    *cols;
1200   const PetscScalar *vals;
1201   PetscScalar        v;
1202 
1203   PetscFunctionBegin;
1204   if (reuse == MAT_REUSE_MATRIX) {
1205     mat_scal = *newmat;
1206     PetscCall(MatZeroEntries(mat_scal));
1207   } else {
1208     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1209     m = PETSC_DECIDE;
1210     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1211     n = PETSC_DECIDE;
1212     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1213     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1214     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1215     PetscCall(MatSetUp(mat_scal));
1216   }
1217   PetscCall(MatGetRowUpperTriangular(A));
1218   for (row = rstart; row < rend; row++) {
1219     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1220     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1221     for (j = 0; j < ncols; j++) { /* lower triangular part */
1222       if (cols[j] == row) continue;
1223       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1224       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1225     }
1226     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1227   }
1228   PetscCall(MatRestoreRowUpperTriangular(A));
1229   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1230   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1231 
1232   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1233   else *newmat = mat_scal;
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1238 {
1239   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1240   PetscInt       sz = 0;
1241 
1242   PetscFunctionBegin;
1243   PetscCall(PetscLayoutSetUp(A->rmap));
1244   PetscCall(PetscLayoutSetUp(A->cmap));
1245   if (!a->lld) a->lld = a->locr;
1246 
1247   PetscCall(PetscFree(a->loc));
1248   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1249   PetscCall(PetscCalloc1(sz, &a->loc));
1250 
1251   A->preallocated = PETSC_TRUE;
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1256 {
1257   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1258   Mat_ScaLAPACK_Grid *grid;
1259   PetscBool           flg;
1260   MPI_Comm            icomm;
1261 
1262   PetscFunctionBegin;
1263   PetscCall(MatStashDestroy_Private(&A->stash));
1264   PetscCall(PetscFree(a->loc));
1265   PetscCall(PetscFree(a->pivots));
1266   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1267   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1268   if (--grid->grid_refct == 0) {
1269     Cblacs_gridexit(grid->ictxt);
1270     Cblacs_gridexit(grid->ictxrow);
1271     Cblacs_gridexit(grid->ictxcol);
1272     PetscCall(PetscFree(grid));
1273     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1274   }
1275   PetscCall(PetscCommDestroy(&icomm));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1279   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1280   PetscCall(PetscFree(A->data));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1285 {
1286   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1287   PetscBLASInt   info = 0;
1288   PetscBool      flg;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscLayoutSetUp(A->rmap));
1292   PetscCall(PetscLayoutSetUp(A->cmap));
1293 
1294   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1295   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1296   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1297   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1298   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1299 
1300   /* compute local sizes */
1301   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1302   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1303   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1304   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1305   a->lld  = PetscMax(1, a->locr);
1306 
1307   /* allocate local array */
1308   PetscCall(MatScaLAPACKSetPreallocation(A));
1309 
1310   /* set up ScaLAPACK descriptor */
1311   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1312   PetscCheckScaLapackInfo("descinit", info);
1313   PetscFunctionReturn(PETSC_SUCCESS);
1314 }
1315 
1316 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1317 {
1318   PetscInt nstash, reallocs;
1319 
1320   PetscFunctionBegin;
1321   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1322   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1323   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1324   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1329 {
1330   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1331   PetscMPIInt    n;
1332   PetscInt       i, flg, *row, *col;
1333   PetscScalar   *val;
1334   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1335 
1336   PetscFunctionBegin;
1337   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1338   while (1) {
1339     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1340     if (!flg) break;
1341     for (i = 0; i < n; i++) {
1342       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1343       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1344       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1345       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1346       switch (A->insertmode) {
1347       case INSERT_VALUES:
1348         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1349         break;
1350       case ADD_VALUES:
1351         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1352         break;
1353       default:
1354         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1355       }
1356     }
1357   }
1358   PetscCall(MatStashScatterEnd_Private(&A->stash));
1359   PetscFunctionReturn(PETSC_SUCCESS);
1360 }
1361 
1362 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1363 {
1364   Mat      Adense, As;
1365   MPI_Comm comm;
1366 
1367   PetscFunctionBegin;
1368   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1369   PetscCall(MatCreate(comm, &Adense));
1370   PetscCall(MatSetType(Adense, MATDENSE));
1371   PetscCall(MatLoad(Adense, viewer));
1372   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1373   PetscCall(MatDestroy(&Adense));
1374   PetscCall(MatHeaderReplace(newMat, &As));
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
1378 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1379                                        NULL,
1380                                        NULL,
1381                                        MatMult_ScaLAPACK,
1382                                        /* 4*/ MatMultAdd_ScaLAPACK,
1383                                        MatMultTranspose_ScaLAPACK,
1384                                        MatMultTransposeAdd_ScaLAPACK,
1385                                        MatSolve_ScaLAPACK,
1386                                        MatSolveAdd_ScaLAPACK,
1387                                        NULL,
1388                                        /*10*/ NULL,
1389                                        MatLUFactor_ScaLAPACK,
1390                                        MatCholeskyFactor_ScaLAPACK,
1391                                        NULL,
1392                                        MatTranspose_ScaLAPACK,
1393                                        /*15*/ MatGetInfo_ScaLAPACK,
1394                                        NULL,
1395                                        MatGetDiagonal_ScaLAPACK,
1396                                        MatDiagonalScale_ScaLAPACK,
1397                                        MatNorm_ScaLAPACK,
1398                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1399                                        MatAssemblyEnd_ScaLAPACK,
1400                                        MatSetOption_ScaLAPACK,
1401                                        MatZeroEntries_ScaLAPACK,
1402                                        /*24*/ NULL,
1403                                        MatLUFactorSymbolic_ScaLAPACK,
1404                                        MatLUFactorNumeric_ScaLAPACK,
1405                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1406                                        MatCholeskyFactorNumeric_ScaLAPACK,
1407                                        /*29*/ MatSetUp_ScaLAPACK,
1408                                        NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        NULL,
1412                                        /*34*/ MatDuplicate_ScaLAPACK,
1413                                        NULL,
1414                                        NULL,
1415                                        NULL,
1416                                        NULL,
1417                                        /*39*/ MatAXPY_ScaLAPACK,
1418                                        NULL,
1419                                        NULL,
1420                                        NULL,
1421                                        MatCopy_ScaLAPACK,
1422                                        /*44*/ NULL,
1423                                        MatScale_ScaLAPACK,
1424                                        MatShift_ScaLAPACK,
1425                                        NULL,
1426                                        NULL,
1427                                        /*49*/ NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        NULL,
1432                                        /*54*/ NULL,
1433                                        NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        /*59*/ NULL,
1438                                        MatDestroy_ScaLAPACK,
1439                                        MatView_ScaLAPACK,
1440                                        NULL,
1441                                        NULL,
1442                                        /*64*/ NULL,
1443                                        NULL,
1444                                        NULL,
1445                                        NULL,
1446                                        NULL,
1447                                        /*69*/ NULL,
1448                                        NULL,
1449                                        MatConvert_ScaLAPACK_Dense,
1450                                        NULL,
1451                                        NULL,
1452                                        /*74*/ NULL,
1453                                        NULL,
1454                                        NULL,
1455                                        NULL,
1456                                        NULL,
1457                                        /*79*/ NULL,
1458                                        NULL,
1459                                        NULL,
1460                                        NULL,
1461                                        MatLoad_ScaLAPACK,
1462                                        /*84*/ NULL,
1463                                        NULL,
1464                                        NULL,
1465                                        NULL,
1466                                        NULL,
1467                                        /*89*/ NULL,
1468                                        NULL,
1469                                        MatMatMultNumeric_ScaLAPACK,
1470                                        NULL,
1471                                        NULL,
1472                                        /*94*/ NULL,
1473                                        NULL,
1474                                        NULL,
1475                                        MatMatTransposeMultNumeric_ScaLAPACK,
1476                                        NULL,
1477                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1478                                        NULL,
1479                                        NULL,
1480                                        MatConjugate_ScaLAPACK,
1481                                        NULL,
1482                                        /*104*/ NULL,
1483                                        NULL,
1484                                        NULL,
1485                                        NULL,
1486                                        NULL,
1487                                        /*109*/ MatMatSolve_ScaLAPACK,
1488                                        NULL,
1489                                        NULL,
1490                                        NULL,
1491                                        MatMissingDiagonal_ScaLAPACK,
1492                                        /*114*/ NULL,
1493                                        NULL,
1494                                        NULL,
1495                                        NULL,
1496                                        NULL,
1497                                        /*119*/ NULL,
1498                                        MatHermitianTranspose_ScaLAPACK,
1499                                        MatMultHermitianTranspose_ScaLAPACK,
1500                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1501                                        NULL,
1502                                        /*124*/ NULL,
1503                                        NULL,
1504                                        NULL,
1505                                        NULL,
1506                                        NULL,
1507                                        /*129*/ NULL,
1508                                        NULL,
1509                                        NULL,
1510                                        MatTransposeMatMultNumeric_ScaLAPACK,
1511                                        NULL,
1512                                        /*134*/ NULL,
1513                                        NULL,
1514                                        NULL,
1515                                        NULL,
1516                                        NULL,
1517                                        NULL,
1518                                        /*140*/ NULL,
1519                                        NULL,
1520                                        NULL,
1521                                        NULL,
1522                                        NULL,
1523                                        /*145*/ NULL,
1524                                        NULL,
1525                                        NULL,
1526                                        NULL,
1527                                        NULL,
1528                                        /*150*/ NULL,
1529                                        NULL,
1530                                        NULL,
1531                                        NULL};
1532 
1533 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1534 {
1535   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1536   PetscInt           size = stash->size, nsends;
1537   PetscInt           count, *sindices, **rindices, i, j, l;
1538   PetscScalar      **rvalues, *svalues;
1539   MPI_Comm           comm = stash->comm;
1540   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1541   PetscMPIInt       *sizes, *nlengths, nreceives;
1542   PetscInt          *sp_idx, *sp_idy;
1543   PetscScalar       *sp_val;
1544   PetscMatStashSpace space, space_next;
1545   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1546   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1547 
1548   PetscFunctionBegin;
1549   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1550     InsertMode addv;
1551     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1552     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1553     mat->insertmode = addv; /* in case this processor had no cache */
1554   }
1555 
1556   bs2 = stash->bs * stash->bs;
1557 
1558   /*  first count number of contributors to each processor */
1559   PetscCall(PetscCalloc1(size, &nlengths));
1560   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1561 
1562   i = j = 0;
1563   space = stash->space_head;
1564   while (space) {
1565     space_next = space->next;
1566     for (l = 0; l < space->local_used; l++) {
1567       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1568       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1569       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1570       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1571       nlengths[j]++;
1572       owner[i] = j;
1573       i++;
1574     }
1575     space = space_next;
1576   }
1577 
1578   /* Now check what procs get messages - and compute nsends. */
1579   PetscCall(PetscCalloc1(size, &sizes));
1580   for (i = 0, nsends = 0; i < size; i++) {
1581     if (nlengths[i]) {
1582       sizes[i] = 1;
1583       nsends++;
1584     }
1585   }
1586 
1587   {
1588     PetscMPIInt *onodes, *olengths;
1589     /* Determine the number of messages to expect, their lengths, from from-ids */
1590     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1591     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1592     /* since clubbing row,col - lengths are multiplied by 2 */
1593     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1594     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1595     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1596     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1597     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1598     PetscCall(PetscFree(onodes));
1599     PetscCall(PetscFree(olengths));
1600   }
1601 
1602   /* do sends:
1603       1) starts[i] gives the starting index in svalues for stuff going to
1604          the ith processor
1605   */
1606   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1607   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1608   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1609   /* use 2 sends the first with all_a, the next with all_i and all_j */
1610   startv[0] = 0;
1611   starti[0] = 0;
1612   for (i = 1; i < size; i++) {
1613     startv[i] = startv[i - 1] + nlengths[i - 1];
1614     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1615   }
1616 
1617   i     = 0;
1618   space = stash->space_head;
1619   while (space) {
1620     space_next = space->next;
1621     sp_idx     = space->idx;
1622     sp_idy     = space->idy;
1623     sp_val     = space->val;
1624     for (l = 0; l < space->local_used; l++) {
1625       j = owner[i];
1626       if (bs2 == 1) {
1627         svalues[startv[j]] = sp_val[l];
1628       } else {
1629         PetscInt     k;
1630         PetscScalar *buf1, *buf2;
1631         buf1 = svalues + bs2 * startv[j];
1632         buf2 = space->val + bs2 * l;
1633         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1634       }
1635       sindices[starti[j]]               = sp_idx[l];
1636       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1637       startv[j]++;
1638       starti[j]++;
1639       i++;
1640     }
1641     space = space_next;
1642   }
1643   startv[0] = 0;
1644   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1645 
1646   for (i = 0, count = 0; i < size; i++) {
1647     if (sizes[i]) {
1648       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1649       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1650     }
1651   }
1652 #if defined(PETSC_USE_INFO)
1653   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1654   for (i = 0; i < size; i++) {
1655     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1656   }
1657 #endif
1658   PetscCall(PetscFree(nlengths));
1659   PetscCall(PetscFree(owner));
1660   PetscCall(PetscFree2(startv, starti));
1661   PetscCall(PetscFree(sizes));
1662 
1663   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1664   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1665 
1666   for (i = 0; i < nreceives; i++) {
1667     recv_waits[2 * i]     = recv_waits1[i];
1668     recv_waits[2 * i + 1] = recv_waits2[i];
1669   }
1670   stash->recv_waits = recv_waits;
1671 
1672   PetscCall(PetscFree(recv_waits1));
1673   PetscCall(PetscFree(recv_waits2));
1674 
1675   stash->svalues         = svalues;
1676   stash->sindices        = sindices;
1677   stash->rvalues         = rvalues;
1678   stash->rindices        = rindices;
1679   stash->send_waits      = send_waits;
1680   stash->nsends          = nsends;
1681   stash->nrecvs          = nreceives;
1682   stash->reproduce_count = 0;
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1687 {
1688   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1689 
1690   PetscFunctionBegin;
1691   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1692   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1693   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1694   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1695   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1696   PetscFunctionReturn(PETSC_SUCCESS);
1697 }
1698 
1699 /*@
1700   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1701   the `MATSCALAPACK` matrix
1702 
1703   Logically Collective
1704 
1705   Input Parameters:
1706 + A  - a `MATSCALAPACK` matrix
1707 . mb - the row block size
1708 - nb - the column block size
1709 
1710   Level: intermediate
1711 
1712   Note:
1713   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1714 
1715 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1716 @*/
1717 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1718 {
1719   PetscFunctionBegin;
1720   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1721   PetscValidLogicalCollectiveInt(A, mb, 2);
1722   PetscValidLogicalCollectiveInt(A, nb, 3);
1723   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1724   PetscFunctionReturn(PETSC_SUCCESS);
1725 }
1726 
1727 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1728 {
1729   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1730 
1731   PetscFunctionBegin;
1732   if (mb) *mb = a->mb;
1733   if (nb) *nb = a->nb;
1734   PetscFunctionReturn(PETSC_SUCCESS);
1735 }
1736 
1737 /*@
1738   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1739   the `MATSCALAPACK` matrix
1740 
1741   Not Collective
1742 
1743   Input Parameter:
1744 . A - a `MATSCALAPACK` matrix
1745 
1746   Output Parameters:
1747 + mb - the row block size
1748 - nb - the column block size
1749 
1750   Level: intermediate
1751 
1752   Note:
1753   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1754 
1755 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1756 @*/
1757 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1758 {
1759   PetscFunctionBegin;
1760   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1761   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1762   PetscFunctionReturn(PETSC_SUCCESS);
1763 }
1764 
1765 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1766 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1767 
1768 /*MC
1769    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1770 
1771    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1772 
1773    Options Database Keys:
1774 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1775 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1776 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1777 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1778 
1779    Level: intermediate
1780 
1781   Note:
1782    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1783    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1784    the given rank.
1785 
1786 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1787 M*/
1788 
1789 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1790 {
1791   Mat_ScaLAPACK      *a;
1792   PetscBool           flg, flg1;
1793   Mat_ScaLAPACK_Grid *grid;
1794   MPI_Comm            icomm;
1795   PetscBLASInt        nprow, npcol, myrow, mycol;
1796   PetscInt            optv1, k = 2, array[2] = {0, 0};
1797   PetscMPIInt         size;
1798 
1799   PetscFunctionBegin;
1800   A->ops[0]     = MatOps_Values;
1801   A->insertmode = NOT_SET_VALUES;
1802 
1803   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1804   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1805   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1806   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1807   A->stash.ScatterDestroy = NULL;
1808 
1809   PetscCall(PetscNew(&a));
1810   A->data = (void *)a;
1811 
1812   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1813   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1814     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1815     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1816     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1817   }
1818   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1819   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1820   if (!flg) {
1821     PetscCall(PetscNew(&grid));
1822 
1823     PetscCallMPI(MPI_Comm_size(icomm, &size));
1824     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1825 
1826     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1827     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1828     if (flg1) {
1829       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1830       grid->nprow = optv1;
1831     }
1832     PetscOptionsEnd();
1833 
1834     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1835     grid->npcol = size / grid->nprow;
1836     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1837     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1838     grid->ictxt = Csys2blacs_handle(icomm);
1839     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1840     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1841     grid->grid_refct = 1;
1842     grid->nprow      = nprow;
1843     grid->npcol      = npcol;
1844     grid->myrow      = myrow;
1845     grid->mycol      = mycol;
1846     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1847     grid->ictxrow = Csys2blacs_handle(icomm);
1848     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1849     grid->ictxcol = Csys2blacs_handle(icomm);
1850     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1851     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1852 
1853   } else grid->grid_refct++;
1854   PetscCall(PetscCommDestroy(&icomm));
1855   a->grid = grid;
1856   a->mb   = DEFAULT_BLOCKSIZE;
1857   a->nb   = DEFAULT_BLOCKSIZE;
1858 
1859   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1860   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1861   if (flg) {
1862     a->mb = array[0];
1863     a->nb = (k > 1) ? array[1] : a->mb;
1864   }
1865   PetscOptionsEnd();
1866 
1867   a->roworiented = PETSC_TRUE;
1868   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1869   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1870   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1871   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 /*@C
1876   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1877   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1878 
1879   Collective
1880 
1881   Input Parameters:
1882 + comm - MPI communicator
1883 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1884 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1885 . M    - number of global rows
1886 . N    - number of global columns
1887 . rsrc - coordinate of process that owns the first row of the distributed matrix
1888 - csrc - coordinate of process that owns the first column of the distributed matrix
1889 
1890   Output Parameter:
1891 . A - the matrix
1892 
1893   Options Database Key:
1894 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1895 
1896   Level: intermediate
1897 
1898   Notes:
1899   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1900 
1901   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1902   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1903   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1904 
1905   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1906   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1907   significance and are thus ignored. The block sizes refer to the values
1908   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1909 
1910 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1911 @*/
1912 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1913 {
1914   Mat_ScaLAPACK *a;
1915   PetscInt       m, n;
1916 
1917   PetscFunctionBegin;
1918   PetscCall(MatCreate(comm, A));
1919   PetscCall(MatSetType(*A, MATSCALAPACK));
1920   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1921   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1922   m = PETSC_DECIDE;
1923   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1924   n = PETSC_DECIDE;
1925   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1926   PetscCall(MatSetSizes(*A, m, n, M, N));
1927   a = (Mat_ScaLAPACK *)(*A)->data;
1928   PetscCall(PetscBLASIntCast(M, &a->M));
1929   PetscCall(PetscBLASIntCast(N, &a->N));
1930   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1931   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1932   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1933   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1934   PetscCall(MatSetUp(*A));
1935   PetscFunctionReturn(PETSC_SUCCESS);
1936 }
1937