xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision eb9d3e4d1b0d07ce7eca2be9429d4698ffa7ae7f)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     ylld = PetscMax(1, A->rmap->n);
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     ylld = PetscMax(1, lszy);
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
382 {
383   PetscFunctionBegin;
384   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
385   C->ops->productsymbolic = MatProductSymbolic_AB;
386   PetscFunctionReturn(PETSC_SUCCESS);
387 }
388 
389 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
390 {
391   PetscFunctionBegin;
392   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
393   C->ops->productsymbolic          = MatProductSymbolic_ABt;
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
398 {
399   Mat_Product *product = C->product;
400 
401   PetscFunctionBegin;
402   switch (product->type) {
403   case MATPRODUCT_AB:
404     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
405     break;
406   case MATPRODUCT_ABt:
407     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
408     break;
409   default:
410     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
411   }
412   PetscFunctionReturn(PETSC_SUCCESS);
413 }
414 
415 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
416 {
417   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
418   PetscScalar    *darray, *d2d, v;
419   const PetscInt *ranges;
420   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
421 
422   PetscFunctionBegin;
423   PetscCall(VecGetArray(D, &darray));
424 
425   if (A->rmap->N <= A->cmap->N) { /* row version */
426 
427     /* create ScaLAPACK descriptor for vector (1d block distribution) */
428     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
429     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
430     dlld = PetscMax(1, A->rmap->n);
431     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
432     PetscCheckScaLapackInfo("descinit", info);
433 
434     /* allocate 2d vector */
435     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
436     PetscCall(PetscCalloc1(lszd, &d2d));
437     dlld = PetscMax(1, lszd);
438 
439     /* create ScaLAPACK descriptor for vector (2d block distribution) */
440     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
441     PetscCheckScaLapackInfo("descinit", info);
442 
443     /* collect diagonal */
444     for (j = 1; j <= a->M; j++) {
445       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
446       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
447     }
448 
449     /* redistribute d from a column of a 2d matrix */
450     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
451     PetscCall(PetscFree(d2d));
452 
453   } else { /* column version */
454 
455     /* create ScaLAPACK descriptor for vector (1d block distribution) */
456     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
457     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
458     dlld = 1;
459     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
460     PetscCheckScaLapackInfo("descinit", info);
461 
462     /* allocate 2d vector */
463     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
464     PetscCall(PetscCalloc1(lszd, &d2d));
465 
466     /* create ScaLAPACK descriptor for vector (2d block distribution) */
467     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
468     PetscCheckScaLapackInfo("descinit", info);
469 
470     /* collect diagonal */
471     for (j = 1; j <= a->N; j++) {
472       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
473       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
474     }
475 
476     /* redistribute d from a row of a 2d matrix */
477     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
478     PetscCall(PetscFree(d2d));
479   }
480 
481   PetscCall(VecRestoreArray(D, &darray));
482   PetscCall(VecAssemblyBegin(D));
483   PetscCall(VecAssemblyEnd(D));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
487 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
488 {
489   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
490   const PetscScalar *d;
491   const PetscInt    *ranges;
492   PetscScalar       *d2d;
493   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
494 
495   PetscFunctionBegin;
496   if (R) {
497     PetscCall(VecGetArrayRead(R, &d));
498     /* create ScaLAPACK descriptor for vector (1d block distribution) */
499     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
500     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
501     dlld = 1;
502     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
503     PetscCheckScaLapackInfo("descinit", info);
504 
505     /* allocate 2d vector */
506     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
507     PetscCall(PetscCalloc1(lszd, &d2d));
508 
509     /* create ScaLAPACK descriptor for vector (2d block distribution) */
510     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
511     PetscCheckScaLapackInfo("descinit", info);
512 
513     /* redistribute d to a row of a 2d matrix */
514     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
515 
516     /* broadcast along process columns */
517     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
518     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
519 
520     /* local scaling */
521     for (j = 0; j < a->locc; j++)
522       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
523 
524     PetscCall(PetscFree(d2d));
525     PetscCall(VecRestoreArrayRead(R, &d));
526   }
527   if (L) {
528     PetscCall(VecGetArrayRead(L, &d));
529     /* create ScaLAPACK descriptor for vector (1d block distribution) */
530     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
531     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
532     dlld = PetscMax(1, A->rmap->n);
533     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
534     PetscCheckScaLapackInfo("descinit", info);
535 
536     /* allocate 2d vector */
537     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
538     PetscCall(PetscCalloc1(lszd, &d2d));
539     dlld = PetscMax(1, lszd);
540 
541     /* create ScaLAPACK descriptor for vector (2d block distribution) */
542     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
543     PetscCheckScaLapackInfo("descinit", info);
544 
545     /* redistribute d to a column of a 2d matrix */
546     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
547 
548     /* broadcast along process rows */
549     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
550     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
551 
552     /* local scaling */
553     for (i = 0; i < a->locr; i++)
554       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
555 
556     PetscCall(PetscFree(d2d));
557     PetscCall(VecRestoreArrayRead(L, &d));
558   }
559   PetscFunctionReturn(PETSC_SUCCESS);
560 }
561 
562 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
563 {
564   PetscFunctionBegin;
565   *missing = PETSC_FALSE;
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
570 {
571   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
572   PetscBLASInt   n, one = 1;
573 
574   PetscFunctionBegin;
575   n = x->lld * x->locc;
576   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
577   PetscFunctionReturn(PETSC_SUCCESS);
578 }
579 
580 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
581 {
582   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
583   PetscBLASInt   i, n;
584   PetscScalar    v;
585 
586   PetscFunctionBegin;
587   n = PetscMin(x->M, x->N);
588   for (i = 1; i <= n; i++) {
589     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
590     v += alpha;
591     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
592   }
593   PetscFunctionReturn(PETSC_SUCCESS);
594 }
595 
596 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
597 {
598   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
599   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
600   PetscBLASInt   one  = 1;
601   PetscScalar    beta = 1.0;
602 
603   PetscFunctionBegin;
604   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
605   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
606   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
607   PetscFunctionReturn(PETSC_SUCCESS);
608 }
609 
610 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
611 {
612   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
613   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
614 
615   PetscFunctionBegin;
616   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
617   PetscCall(PetscObjectStateIncrease((PetscObject)B));
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
622 {
623   Mat            Bs;
624   MPI_Comm       comm;
625   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
626 
627   PetscFunctionBegin;
628   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
629   PetscCall(MatCreate(comm, &Bs));
630   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
631   PetscCall(MatSetType(Bs, MATSCALAPACK));
632   b       = (Mat_ScaLAPACK *)Bs->data;
633   b->M    = a->M;
634   b->N    = a->N;
635   b->mb   = a->mb;
636   b->nb   = a->nb;
637   b->rsrc = a->rsrc;
638   b->csrc = a->csrc;
639   PetscCall(MatSetUp(Bs));
640   *B = Bs;
641   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
642   Bs->assembled = PETSC_TRUE;
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
647 {
648   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
649   Mat            Bs   = *B;
650   PetscBLASInt   one  = 1;
651   PetscScalar    sone = 1.0, zero = 0.0;
652 #if defined(PETSC_USE_COMPLEX)
653   PetscInt i, j;
654 #endif
655 
656   PetscFunctionBegin;
657   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
658   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
659   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
660   *B = Bs;
661   b  = (Mat_ScaLAPACK *)Bs->data;
662   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
663 #if defined(PETSC_USE_COMPLEX)
664   /* undo conjugation */
665   for (i = 0; i < b->locr; i++)
666     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
667 #endif
668   Bs->assembled = PETSC_TRUE;
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671 
672 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
673 {
674   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
675   PetscInt       i, j;
676 
677   PetscFunctionBegin;
678   for (i = 0; i < a->locr; i++)
679     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
680   PetscFunctionReturn(PETSC_SUCCESS);
681 }
682 
683 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
684 {
685   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
686   Mat            Bs   = *B;
687   PetscBLASInt   one  = 1;
688   PetscScalar    sone = 1.0, zero = 0.0;
689 
690   PetscFunctionBegin;
691   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
692   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
693   *B = Bs;
694   b  = (Mat_ScaLAPACK *)Bs->data;
695   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
696   Bs->assembled = PETSC_TRUE;
697   PetscFunctionReturn(PETSC_SUCCESS);
698 }
699 
700 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
701 {
702   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
703   PetscScalar    *x, *x2d;
704   const PetscInt *ranges;
705   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
706 
707   PetscFunctionBegin;
708   PetscCall(VecCopy(B, X));
709   PetscCall(VecGetArray(X, &x));
710 
711   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
712   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
713   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
714   xlld = PetscMax(1, A->rmap->n);
715   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
716   PetscCheckScaLapackInfo("descinit", info);
717 
718   /* allocate 2d vector */
719   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
720   PetscCall(PetscMalloc1(lszx, &x2d));
721   xlld = PetscMax(1, lszx);
722 
723   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
724   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
725   PetscCheckScaLapackInfo("descinit", info);
726 
727   /* redistribute x as a column of a 2d matrix */
728   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
729 
730   /* call ScaLAPACK subroutine */
731   switch (A->factortype) {
732   case MAT_FACTOR_LU:
733     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
734     PetscCheckScaLapackInfo("getrs", info);
735     break;
736   case MAT_FACTOR_CHOLESKY:
737     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
738     PetscCheckScaLapackInfo("potrs", info);
739     break;
740   default:
741     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
742   }
743 
744   /* redistribute x from a column of a 2d matrix */
745   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
746 
747   PetscCall(PetscFree(x2d));
748   PetscCall(VecRestoreArray(X, &x));
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
753 {
754   PetscFunctionBegin;
755   PetscCall(MatSolve_ScaLAPACK(A, B, X));
756   PetscCall(VecAXPY(X, 1, Y));
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
761 {
762   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
763   PetscBool      flg1, flg2;
764   PetscBLASInt   one = 1, info;
765 
766   PetscFunctionBegin;
767   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
768   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
769   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
770   MatScaLAPACKCheckDistribution(B, 1, X, 2);
771   b = (Mat_ScaLAPACK *)B->data;
772   x = (Mat_ScaLAPACK *)X->data;
773   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
774 
775   switch (A->factortype) {
776   case MAT_FACTOR_LU:
777     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
778     PetscCheckScaLapackInfo("getrs", info);
779     break;
780   case MAT_FACTOR_CHOLESKY:
781     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
782     PetscCheckScaLapackInfo("potrs", info);
783     break;
784   default:
785     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
786   }
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
791 {
792   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
793   PetscBLASInt   one = 1, info;
794 
795   PetscFunctionBegin;
796   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
797   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
798   PetscCheckScaLapackInfo("getrf", info);
799   A->factortype = MAT_FACTOR_LU;
800   A->assembled  = PETSC_TRUE;
801 
802   PetscCall(PetscFree(A->solvertype));
803   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
808 {
809   PetscFunctionBegin;
810   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
811   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
816 {
817   PetscFunctionBegin;
818   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
823 {
824   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
825   PetscBLASInt   one = 1, info;
826 
827   PetscFunctionBegin;
828   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
829   PetscCheckScaLapackInfo("potrf", info);
830   A->factortype = MAT_FACTOR_CHOLESKY;
831   A->assembled  = PETSC_TRUE;
832 
833   PetscCall(PetscFree(A->solvertype));
834   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
835   PetscFunctionReturn(PETSC_SUCCESS);
836 }
837 
838 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
839 {
840   PetscFunctionBegin;
841   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
842   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
843   PetscFunctionReturn(PETSC_SUCCESS);
844 }
845 
846 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
847 {
848   PetscFunctionBegin;
849   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
854 {
855   PetscFunctionBegin;
856   *type = MATSOLVERSCALAPACK;
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
861 {
862   Mat            B;
863   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
864 
865   PetscFunctionBegin;
866   /* Create the factorization matrix */
867   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
868   B->trivialsymbolic = PETSC_TRUE;
869   B->factortype      = ftype;
870   PetscCall(PetscFree(B->solvertype));
871   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
872 
873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
874   *F = B;
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
879 {
880   PetscFunctionBegin;
881   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
882   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
887 {
888   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
889   PetscBLASInt   one = 1, lwork = 0;
890   const char    *ntype;
891   PetscScalar   *work = NULL, dummy;
892 
893   PetscFunctionBegin;
894   switch (type) {
895   case NORM_1:
896     ntype = "1";
897     lwork = PetscMax(a->locr, a->locc);
898     break;
899   case NORM_FROBENIUS:
900     ntype = "F";
901     work  = &dummy;
902     break;
903   case NORM_INFINITY:
904     ntype = "I";
905     lwork = PetscMax(a->locr, a->locc);
906     break;
907   default:
908     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
909   }
910   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
911   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
912   if (lwork) PetscCall(PetscFree(work));
913   PetscFunctionReturn(PETSC_SUCCESS);
914 }
915 
916 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
917 {
918   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
919 
920   PetscFunctionBegin;
921   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
922   PetscFunctionReturn(PETSC_SUCCESS);
923 }
924 
925 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
926 {
927   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
928   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
929 
930   PetscFunctionBegin;
931   if (rows) {
932     n     = a->locr;
933     nb    = a->mb;
934     isrc  = a->rsrc;
935     nproc = a->grid->nprow;
936     iproc = a->grid->myrow;
937     PetscCall(PetscMalloc1(n, &idx));
938     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
939     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
940   }
941   if (cols) {
942     n     = a->locc;
943     nb    = a->nb;
944     isrc  = a->csrc;
945     nproc = a->grid->npcol;
946     iproc = a->grid->mycol;
947     PetscCall(PetscMalloc1(n, &idx));
948     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
949     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
950   }
951   PetscFunctionReturn(PETSC_SUCCESS);
952 }
953 
954 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
955 {
956   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
957   Mat                Bmpi;
958   MPI_Comm           comm;
959   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
960   const PetscInt    *ranges, *branges, *cwork;
961   const PetscScalar *vwork;
962   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
963   PetscScalar       *barray;
964   PetscBool          differ = PETSC_FALSE;
965   PetscMPIInt        size;
966 
967   PetscFunctionBegin;
968   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
969   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
970 
971   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
972     PetscCallMPI(MPI_Comm_size(comm, &size));
973     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
974     for (i = 0; i < size; i++)
975       if (ranges[i + 1] != branges[i + 1]) {
976         differ = PETSC_TRUE;
977         break;
978       }
979   }
980 
981   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
982     PetscCall(MatCreate(comm, &Bmpi));
983     m = PETSC_DECIDE;
984     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
985     n = PETSC_DECIDE;
986     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
987     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
988     PetscCall(MatSetType(Bmpi, MATDENSE));
989     PetscCall(MatSetUp(Bmpi));
990 
991     /* create ScaLAPACK descriptor for B (1d block distribution) */
992     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
993     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
994     lld = PetscMax(ldb, 1); /* local leading dimension */
995     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
996     PetscCheckScaLapackInfo("descinit", info);
997 
998     /* redistribute matrix */
999     PetscCall(MatDenseGetArray(Bmpi, &barray));
1000     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1001     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1002     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1003     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1004 
1005     /* transfer rows of auxiliary matrix to the final matrix B */
1006     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1007     for (i = rstart; i < rend; i++) {
1008       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1009       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1010       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1011     }
1012     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1013     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1014     PetscCall(MatDestroy(&Bmpi));
1015 
1016   } else { /* normal cases */
1017 
1018     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1019     else {
1020       PetscCall(MatCreate(comm, &Bmpi));
1021       m = PETSC_DECIDE;
1022       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1023       n = PETSC_DECIDE;
1024       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1025       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1026       PetscCall(MatSetType(Bmpi, MATDENSE));
1027       PetscCall(MatSetUp(Bmpi));
1028     }
1029 
1030     /* create ScaLAPACK descriptor for B (1d block distribution) */
1031     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1032     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1033     lld = PetscMax(ldb, 1); /* local leading dimension */
1034     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1035     PetscCheckScaLapackInfo("descinit", info);
1036 
1037     /* redistribute matrix */
1038     PetscCall(MatDenseGetArray(Bmpi, &barray));
1039     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1040     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1041 
1042     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1043     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1044     if (reuse == MAT_INPLACE_MATRIX) {
1045       PetscCall(MatHeaderReplace(A, &Bmpi));
1046     } else *B = Bmpi;
1047   }
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
1051 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1052 {
1053   const PetscInt *ranges;
1054   PetscMPIInt     size;
1055   PetscInt        i, n;
1056 
1057   PetscFunctionBegin;
1058   *correct = PETSC_TRUE;
1059   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1060   if (size > 1) {
1061     PetscCall(PetscLayoutGetRanges(map, &ranges));
1062     n = ranges[1] - ranges[0];
1063     for (i = 1; i < size; i++)
1064       if (ranges[i + 1] - ranges[i] != n) break;
1065     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1066   }
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1071 {
1072   Mat_ScaLAPACK     *b;
1073   Mat                Bmpi;
1074   MPI_Comm           comm;
1075   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1076   const PetscInt    *ranges, *rows, *cols;
1077   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1078   const PetscScalar *aarray;
1079   IS                 ir, ic;
1080   PetscInt           lda;
1081   PetscBool          flg;
1082 
1083   PetscFunctionBegin;
1084   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1085 
1086   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1087   else {
1088     PetscCall(MatCreate(comm, &Bmpi));
1089     m = PETSC_DECIDE;
1090     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1091     n = PETSC_DECIDE;
1092     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1093     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1094     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1095     PetscCall(MatSetUp(Bmpi));
1096   }
1097   b = (Mat_ScaLAPACK *)Bmpi->data;
1098 
1099   PetscCall(MatDenseGetLDA(A, &lda));
1100   PetscCall(MatDenseGetArrayRead(A, &aarray));
1101   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1102   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1103   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1104     /* create ScaLAPACK descriptor for A (1d block distribution) */
1105     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1106     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1107     lld = PetscMax(lda, 1);                       /* local leading dimension */
1108     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1109     PetscCheckScaLapackInfo("descinit", info);
1110 
1111     /* redistribute matrix */
1112     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1113     Bmpi->nooffprocentries = PETSC_TRUE;
1114   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1115     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1116     b->roworiented = PETSC_FALSE;
1117     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1118     PetscCall(ISGetIndices(ir, &rows));
1119     PetscCall(ISGetIndices(ic, &cols));
1120     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1121     PetscCall(ISRestoreIndices(ir, &rows));
1122     PetscCall(ISRestoreIndices(ic, &cols));
1123     PetscCall(ISDestroy(&ic));
1124     PetscCall(ISDestroy(&ir));
1125   }
1126   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1127   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1128   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1129   if (reuse == MAT_INPLACE_MATRIX) {
1130     PetscCall(MatHeaderReplace(A, &Bmpi));
1131   } else *B = Bmpi;
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1136 {
1137   Mat                mat_scal;
1138   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1139   const PetscInt    *cols;
1140   const PetscScalar *vals;
1141 
1142   PetscFunctionBegin;
1143   if (reuse == MAT_REUSE_MATRIX) {
1144     mat_scal = *newmat;
1145     PetscCall(MatZeroEntries(mat_scal));
1146   } else {
1147     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1148     m = PETSC_DECIDE;
1149     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1150     n = PETSC_DECIDE;
1151     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1152     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1153     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1154     PetscCall(MatSetUp(mat_scal));
1155   }
1156   for (row = rstart; row < rend; row++) {
1157     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1158     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1159     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1160   }
1161   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1162   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1163 
1164   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1165   else *newmat = mat_scal;
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
1169 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1170 {
1171   Mat                mat_scal;
1172   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1173   const PetscInt    *cols;
1174   const PetscScalar *vals;
1175   PetscScalar        v;
1176 
1177   PetscFunctionBegin;
1178   if (reuse == MAT_REUSE_MATRIX) {
1179     mat_scal = *newmat;
1180     PetscCall(MatZeroEntries(mat_scal));
1181   } else {
1182     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1183     m = PETSC_DECIDE;
1184     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1185     n = PETSC_DECIDE;
1186     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1187     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1188     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1189     PetscCall(MatSetUp(mat_scal));
1190   }
1191   PetscCall(MatGetRowUpperTriangular(A));
1192   for (row = rstart; row < rend; row++) {
1193     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1194     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1195     for (j = 0; j < ncols; j++) { /* lower triangular part */
1196       if (cols[j] == row) continue;
1197       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1198       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1199     }
1200     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1201   }
1202   PetscCall(MatRestoreRowUpperTriangular(A));
1203   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1204   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1205 
1206   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1207   else *newmat = mat_scal;
1208   PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210 
1211 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1212 {
1213   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1214   PetscInt       sz = 0;
1215 
1216   PetscFunctionBegin;
1217   PetscCall(PetscLayoutSetUp(A->rmap));
1218   PetscCall(PetscLayoutSetUp(A->cmap));
1219   if (!a->lld) a->lld = a->locr;
1220 
1221   PetscCall(PetscFree(a->loc));
1222   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1223   PetscCall(PetscCalloc1(sz, &a->loc));
1224 
1225   A->preallocated = PETSC_TRUE;
1226   PetscFunctionReturn(PETSC_SUCCESS);
1227 }
1228 
1229 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1230 {
1231   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1232   Mat_ScaLAPACK_Grid *grid;
1233   PetscBool           flg;
1234   MPI_Comm            icomm;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(MatStashDestroy_Private(&A->stash));
1238   PetscCall(PetscFree(a->loc));
1239   PetscCall(PetscFree(a->pivots));
1240   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1241   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1242   if (--grid->grid_refct == 0) {
1243     Cblacs_gridexit(grid->ictxt);
1244     Cblacs_gridexit(grid->ictxrow);
1245     Cblacs_gridexit(grid->ictxcol);
1246     PetscCall(PetscFree(grid));
1247     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1248   }
1249   PetscCall(PetscCommDestroy(&icomm));
1250   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1251   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1252   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1253   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1254   PetscCall(PetscFree(A->data));
1255   PetscFunctionReturn(PETSC_SUCCESS);
1256 }
1257 
1258 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1259 {
1260   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1261   PetscBLASInt   info = 0;
1262   PetscBool      flg;
1263 
1264   PetscFunctionBegin;
1265   PetscCall(PetscLayoutSetUp(A->rmap));
1266   PetscCall(PetscLayoutSetUp(A->cmap));
1267 
1268   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1269   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1270   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1271   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1272   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1273 
1274   /* compute local sizes */
1275   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1276   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1277   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1278   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1279   a->lld  = PetscMax(1, a->locr);
1280 
1281   /* allocate local array */
1282   PetscCall(MatScaLAPACKSetPreallocation(A));
1283 
1284   /* set up ScaLAPACK descriptor */
1285   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1286   PetscCheckScaLapackInfo("descinit", info);
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1291 {
1292   PetscInt nstash, reallocs;
1293 
1294   PetscFunctionBegin;
1295   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1296   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1297   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1298   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1303 {
1304   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1305   PetscMPIInt    n;
1306   PetscInt       i, flg, *row, *col;
1307   PetscScalar   *val;
1308   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1309 
1310   PetscFunctionBegin;
1311   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1312   while (1) {
1313     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1314     if (!flg) break;
1315     for (i = 0; i < n; i++) {
1316       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1317       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1318       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1319       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1320       switch (A->insertmode) {
1321       case INSERT_VALUES:
1322         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1323         break;
1324       case ADD_VALUES:
1325         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1326         break;
1327       default:
1328         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1329       }
1330     }
1331   }
1332   PetscCall(MatStashScatterEnd_Private(&A->stash));
1333   PetscFunctionReturn(PETSC_SUCCESS);
1334 }
1335 
1336 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1337 {
1338   Mat      Adense, As;
1339   MPI_Comm comm;
1340 
1341   PetscFunctionBegin;
1342   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1343   PetscCall(MatCreate(comm, &Adense));
1344   PetscCall(MatSetType(Adense, MATDENSE));
1345   PetscCall(MatLoad(Adense, viewer));
1346   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1347   PetscCall(MatDestroy(&Adense));
1348   PetscCall(MatHeaderReplace(newMat, &As));
1349   PetscFunctionReturn(PETSC_SUCCESS);
1350 }
1351 
1352 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1353                                        0,
1354                                        0,
1355                                        MatMult_ScaLAPACK,
1356                                        /* 4*/ MatMultAdd_ScaLAPACK,
1357                                        MatMultTranspose_ScaLAPACK,
1358                                        MatMultTransposeAdd_ScaLAPACK,
1359                                        MatSolve_ScaLAPACK,
1360                                        MatSolveAdd_ScaLAPACK,
1361                                        0,
1362                                        /*10*/ 0,
1363                                        MatLUFactor_ScaLAPACK,
1364                                        MatCholeskyFactor_ScaLAPACK,
1365                                        0,
1366                                        MatTranspose_ScaLAPACK,
1367                                        /*15*/ MatGetInfo_ScaLAPACK,
1368                                        0,
1369                                        MatGetDiagonal_ScaLAPACK,
1370                                        MatDiagonalScale_ScaLAPACK,
1371                                        MatNorm_ScaLAPACK,
1372                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1373                                        MatAssemblyEnd_ScaLAPACK,
1374                                        MatSetOption_ScaLAPACK,
1375                                        MatZeroEntries_ScaLAPACK,
1376                                        /*24*/ 0,
1377                                        MatLUFactorSymbolic_ScaLAPACK,
1378                                        MatLUFactorNumeric_ScaLAPACK,
1379                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1380                                        MatCholeskyFactorNumeric_ScaLAPACK,
1381                                        /*29*/ MatSetUp_ScaLAPACK,
1382                                        0,
1383                                        0,
1384                                        0,
1385                                        0,
1386                                        /*34*/ MatDuplicate_ScaLAPACK,
1387                                        0,
1388                                        0,
1389                                        0,
1390                                        0,
1391                                        /*39*/ MatAXPY_ScaLAPACK,
1392                                        0,
1393                                        0,
1394                                        0,
1395                                        MatCopy_ScaLAPACK,
1396                                        /*44*/ 0,
1397                                        MatScale_ScaLAPACK,
1398                                        MatShift_ScaLAPACK,
1399                                        0,
1400                                        0,
1401                                        /*49*/ 0,
1402                                        0,
1403                                        0,
1404                                        0,
1405                                        0,
1406                                        /*54*/ 0,
1407                                        0,
1408                                        0,
1409                                        0,
1410                                        0,
1411                                        /*59*/ 0,
1412                                        MatDestroy_ScaLAPACK,
1413                                        MatView_ScaLAPACK,
1414                                        0,
1415                                        0,
1416                                        /*64*/ 0,
1417                                        0,
1418                                        0,
1419                                        0,
1420                                        0,
1421                                        /*69*/ 0,
1422                                        0,
1423                                        MatConvert_ScaLAPACK_Dense,
1424                                        0,
1425                                        0,
1426                                        /*74*/ 0,
1427                                        0,
1428                                        0,
1429                                        0,
1430                                        0,
1431                                        /*79*/ 0,
1432                                        0,
1433                                        0,
1434                                        0,
1435                                        MatLoad_ScaLAPACK,
1436                                        /*84*/ 0,
1437                                        0,
1438                                        0,
1439                                        0,
1440                                        0,
1441                                        /*89*/ 0,
1442                                        0,
1443                                        MatMatMultNumeric_ScaLAPACK,
1444                                        0,
1445                                        0,
1446                                        /*94*/ 0,
1447                                        0,
1448                                        0,
1449                                        MatMatTransposeMultNumeric_ScaLAPACK,
1450                                        0,
1451                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1452                                        0,
1453                                        0,
1454                                        MatConjugate_ScaLAPACK,
1455                                        0,
1456                                        /*104*/ 0,
1457                                        0,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                        /*109*/ MatMatSolve_ScaLAPACK,
1462                                        0,
1463                                        0,
1464                                        0,
1465                                        MatMissingDiagonal_ScaLAPACK,
1466                                        /*114*/ 0,
1467                                        0,
1468                                        0,
1469                                        0,
1470                                        0,
1471                                        /*119*/ 0,
1472                                        MatHermitianTranspose_ScaLAPACK,
1473                                        MatMultHermitianTranspose_ScaLAPACK,
1474                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1475                                        0,
1476                                        /*124*/ 0,
1477                                        0,
1478                                        0,
1479                                        0,
1480                                        0,
1481                                        /*129*/ 0,
1482                                        0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                        /*134*/ 0,
1487                                        0,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                        /*140*/ 0,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                        /*145*/ 0,
1498                                        0,
1499                                        0,
1500                                        0,
1501                                        0,
1502                                        /*150*/ 0,
1503                                        0};
1504 
1505 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1506 {
1507   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1508   PetscInt           size = stash->size, nsends;
1509   PetscInt           count, *sindices, **rindices, i, j, l;
1510   PetscScalar      **rvalues, *svalues;
1511   MPI_Comm           comm = stash->comm;
1512   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1513   PetscMPIInt       *sizes, *nlengths, nreceives;
1514   PetscInt          *sp_idx, *sp_idy;
1515   PetscScalar       *sp_val;
1516   PetscMatStashSpace space, space_next;
1517   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1518   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1519 
1520   PetscFunctionBegin;
1521   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1522     InsertMode addv;
1523     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1524     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1525     mat->insertmode = addv; /* in case this processor had no cache */
1526   }
1527 
1528   bs2 = stash->bs * stash->bs;
1529 
1530   /*  first count number of contributors to each processor */
1531   PetscCall(PetscCalloc1(size, &nlengths));
1532   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1533 
1534   i = j = 0;
1535   space = stash->space_head;
1536   while (space) {
1537     space_next = space->next;
1538     for (l = 0; l < space->local_used; l++) {
1539       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1540       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1541       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1542       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1543       nlengths[j]++;
1544       owner[i] = j;
1545       i++;
1546     }
1547     space = space_next;
1548   }
1549 
1550   /* Now check what procs get messages - and compute nsends. */
1551   PetscCall(PetscCalloc1(size, &sizes));
1552   for (i = 0, nsends = 0; i < size; i++) {
1553     if (nlengths[i]) {
1554       sizes[i] = 1;
1555       nsends++;
1556     }
1557   }
1558 
1559   {
1560     PetscMPIInt *onodes, *olengths;
1561     /* Determine the number of messages to expect, their lengths, from from-ids */
1562     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1563     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1564     /* since clubbing row,col - lengths are multiplied by 2 */
1565     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1566     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1567     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1568     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1569     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1570     PetscCall(PetscFree(onodes));
1571     PetscCall(PetscFree(olengths));
1572   }
1573 
1574   /* do sends:
1575       1) starts[i] gives the starting index in svalues for stuff going to
1576          the ith processor
1577   */
1578   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1579   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1580   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1581   /* use 2 sends the first with all_a, the next with all_i and all_j */
1582   startv[0] = 0;
1583   starti[0] = 0;
1584   for (i = 1; i < size; i++) {
1585     startv[i] = startv[i - 1] + nlengths[i - 1];
1586     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1587   }
1588 
1589   i     = 0;
1590   space = stash->space_head;
1591   while (space) {
1592     space_next = space->next;
1593     sp_idx     = space->idx;
1594     sp_idy     = space->idy;
1595     sp_val     = space->val;
1596     for (l = 0; l < space->local_used; l++) {
1597       j = owner[i];
1598       if (bs2 == 1) {
1599         svalues[startv[j]] = sp_val[l];
1600       } else {
1601         PetscInt     k;
1602         PetscScalar *buf1, *buf2;
1603         buf1 = svalues + bs2 * startv[j];
1604         buf2 = space->val + bs2 * l;
1605         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1606       }
1607       sindices[starti[j]]               = sp_idx[l];
1608       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1609       startv[j]++;
1610       starti[j]++;
1611       i++;
1612     }
1613     space = space_next;
1614   }
1615   startv[0] = 0;
1616   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1617 
1618   for (i = 0, count = 0; i < size; i++) {
1619     if (sizes[i]) {
1620       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1621       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1622     }
1623   }
1624 #if defined(PETSC_USE_INFO)
1625   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1626   for (i = 0; i < size; i++) {
1627     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1628   }
1629 #endif
1630   PetscCall(PetscFree(nlengths));
1631   PetscCall(PetscFree(owner));
1632   PetscCall(PetscFree2(startv, starti));
1633   PetscCall(PetscFree(sizes));
1634 
1635   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1636   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1637 
1638   for (i = 0; i < nreceives; i++) {
1639     recv_waits[2 * i]     = recv_waits1[i];
1640     recv_waits[2 * i + 1] = recv_waits2[i];
1641   }
1642   stash->recv_waits = recv_waits;
1643 
1644   PetscCall(PetscFree(recv_waits1));
1645   PetscCall(PetscFree(recv_waits2));
1646 
1647   stash->svalues         = svalues;
1648   stash->sindices        = sindices;
1649   stash->rvalues         = rvalues;
1650   stash->rindices        = rindices;
1651   stash->send_waits      = send_waits;
1652   stash->nsends          = nsends;
1653   stash->nrecvs          = nreceives;
1654   stash->reproduce_count = 0;
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1659 {
1660   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1661 
1662   PetscFunctionBegin;
1663   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1664   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1665   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1666   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1667   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1668   PetscFunctionReturn(PETSC_SUCCESS);
1669 }
1670 
1671 /*@
1672   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1673   the `MATSCALAPACK` matrix
1674 
1675   Logically Collective
1676 
1677   Input Parameters:
1678 + A  - a `MATSCALAPACK` matrix
1679 . mb - the row block size
1680 - nb - the column block size
1681 
1682   Level: intermediate
1683 
1684   Note:
1685   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1686 
1687 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1688 @*/
1689 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1690 {
1691   PetscFunctionBegin;
1692   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1693   PetscValidLogicalCollectiveInt(A, mb, 2);
1694   PetscValidLogicalCollectiveInt(A, nb, 3);
1695   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1696   PetscFunctionReturn(PETSC_SUCCESS);
1697 }
1698 
1699 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1700 {
1701   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1702 
1703   PetscFunctionBegin;
1704   if (mb) *mb = a->mb;
1705   if (nb) *nb = a->nb;
1706   PetscFunctionReturn(PETSC_SUCCESS);
1707 }
1708 
1709 /*@
1710   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1711   the `MATSCALAPACK` matrix
1712 
1713   Not Collective
1714 
1715   Input Parameter:
1716 . A - a `MATSCALAPACK` matrix
1717 
1718   Output Parameters:
1719 + mb - the row block size
1720 - nb - the column block size
1721 
1722   Level: intermediate
1723 
1724   Note:
1725   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1726 
1727 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1728 @*/
1729 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1730 {
1731   PetscFunctionBegin;
1732   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1733   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1734   PetscFunctionReturn(PETSC_SUCCESS);
1735 }
1736 
1737 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1738 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1739 
1740 /*MC
1741    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1742 
1743    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1744 
1745    Options Database Keys:
1746 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1747 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1748 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1749 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1750 
1751    Level: intermediate
1752 
1753   Note:
1754    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1755    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1756    the given rank.
1757 
1758 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1759 M*/
1760 
1761 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1762 {
1763   Mat_ScaLAPACK      *a;
1764   PetscBool           flg, flg1;
1765   Mat_ScaLAPACK_Grid *grid;
1766   MPI_Comm            icomm;
1767   PetscBLASInt        nprow, npcol, myrow, mycol;
1768   PetscInt            optv1, k = 2, array[2] = {0, 0};
1769   PetscMPIInt         size;
1770 
1771   PetscFunctionBegin;
1772   A->ops[0]     = MatOps_Values;
1773   A->insertmode = NOT_SET_VALUES;
1774 
1775   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1776   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1777   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1778   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1779   A->stash.ScatterDestroy = NULL;
1780 
1781   PetscCall(PetscNew(&a));
1782   A->data = (void *)a;
1783 
1784   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1785   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1786     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1787     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1788     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1789   }
1790   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1791   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1792   if (!flg) {
1793     PetscCall(PetscNew(&grid));
1794 
1795     PetscCallMPI(MPI_Comm_size(icomm, &size));
1796     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1797 
1798     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1799     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1800     if (flg1) {
1801       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1802       grid->nprow = optv1;
1803     }
1804     PetscOptionsEnd();
1805 
1806     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1807     grid->npcol = size / grid->nprow;
1808     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1809     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1810     grid->ictxt = Csys2blacs_handle(icomm);
1811     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1812     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1813     grid->grid_refct = 1;
1814     grid->nprow      = nprow;
1815     grid->npcol      = npcol;
1816     grid->myrow      = myrow;
1817     grid->mycol      = mycol;
1818     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1819     grid->ictxrow = Csys2blacs_handle(icomm);
1820     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1821     grid->ictxcol = Csys2blacs_handle(icomm);
1822     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1823     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1824 
1825   } else grid->grid_refct++;
1826   PetscCall(PetscCommDestroy(&icomm));
1827   a->grid = grid;
1828   a->mb   = DEFAULT_BLOCKSIZE;
1829   a->nb   = DEFAULT_BLOCKSIZE;
1830 
1831   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1832   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1833   if (flg) {
1834     a->mb = array[0];
1835     a->nb = (k > 1) ? array[1] : a->mb;
1836   }
1837   PetscOptionsEnd();
1838 
1839   a->roworiented = PETSC_TRUE;
1840   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1841   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1842   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1843   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1844   PetscFunctionReturn(PETSC_SUCCESS);
1845 }
1846 
1847 /*@C
1848   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1849   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1850 
1851   Collective
1852 
1853   Input Parameters:
1854 + comm - MPI communicator
1855 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1856 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1857 . M    - number of global rows
1858 . N    - number of global columns
1859 . rsrc - coordinate of process that owns the first row of the distributed matrix
1860 - csrc - coordinate of process that owns the first column of the distributed matrix
1861 
1862   Output Parameter:
1863 . A - the matrix
1864 
1865   Options Database Key:
1866 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1867 
1868   Level: intermediate
1869 
1870   Notes:
1871   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1872 
1873   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1874   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1875   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1876 
1877   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1878   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1879   significance and are thus ignored. The block sizes refer to the values
1880   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1881 
1882 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1883 @*/
1884 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1885 {
1886   Mat_ScaLAPACK *a;
1887   PetscInt       m, n;
1888 
1889   PetscFunctionBegin;
1890   PetscCall(MatCreate(comm, A));
1891   PetscCall(MatSetType(*A, MATSCALAPACK));
1892   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1893   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1894   m = PETSC_DECIDE;
1895   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1896   n = PETSC_DECIDE;
1897   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1898   PetscCall(MatSetSizes(*A, m, n, M, N));
1899   a = (Mat_ScaLAPACK *)(*A)->data;
1900   PetscCall(PetscBLASIntCast(M, &a->M));
1901   PetscCall(PetscBLASIntCast(N, &a->N));
1902   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1903   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1904   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1905   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1906   PetscCall(MatSetUp(*A));
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909