xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) {
23   PetscFunctionBegin;
24   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
25   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
26   PetscFunctionReturn(0);
27 }
28 
29 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) {
30   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
31   PetscBool         iascii;
32   PetscViewerFormat format;
33   Mat               Adense;
34 
35   PetscFunctionBegin;
36   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
37   if (iascii) {
38     PetscCall(PetscViewerGetFormat(viewer, &format));
39     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
40       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
41       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
42       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
44       PetscFunctionReturn(0);
45     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
46       PetscFunctionReturn(0);
47     }
48   }
49   /* convert to dense format and call MatView() */
50   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
51   PetscCall(MatView(Adense, viewer));
52   PetscCall(MatDestroy(&Adense));
53   PetscFunctionReturn(0);
54 }
55 
56 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) {
57   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
58   PetscLogDouble isend[2], irecv[2];
59 
60   PetscFunctionBegin;
61   info->block_size = 1.0;
62 
63   isend[0] = a->lld * a->locc;  /* locally allocated */
64   isend[1] = a->locr * a->locc; /* used submatrix */
65   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
66     info->nz_allocated = isend[0];
67     info->nz_used      = isend[1];
68   } else if (flag == MAT_GLOBAL_MAX) {
69     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
70     info->nz_allocated = irecv[0];
71     info->nz_used      = irecv[1];
72   } else if (flag == MAT_GLOBAL_SUM) {
73     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
74     info->nz_allocated = irecv[0];
75     info->nz_used      = irecv[1];
76   }
77 
78   info->nz_unneeded       = 0;
79   info->assemblies        = A->num_ass;
80   info->mallocs           = 0;
81   info->memory            = 0; /* REVIEW ME */
82   info->fill_ratio_given  = 0;
83   info->fill_ratio_needed = 0;
84   info->factor_mallocs    = 0;
85   PetscFunctionReturn(0);
86 }
87 
88 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) {
89   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
90 
91   PetscFunctionBegin;
92   switch (op) {
93   case MAT_NEW_NONZERO_LOCATIONS:
94   case MAT_NEW_NONZERO_LOCATION_ERR:
95   case MAT_NEW_NONZERO_ALLOCATION_ERR:
96   case MAT_SYMMETRIC:
97   case MAT_SORTED_FULL:
98   case MAT_HERMITIAN: break;
99   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
100   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) {
106   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
107   PetscInt       i, j;
108   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
109   PetscBool      roworiented = a->roworiented;
110 
111   PetscFunctionBegin;
112   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
113   for (i = 0; i < nr; i++) {
114     if (rows[i] < 0) continue;
115     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
116     for (j = 0; j < nc; j++) {
117       if (cols[j] < 0) continue;
118       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
119       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
120       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
121         if (roworiented) {
122           switch (imode) {
123           case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break;
124           default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break;
125           }
126         } else {
127           switch (imode) {
128           case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break;
129           default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break;
130           }
131         }
132       } else {
133         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
134         A->assembled = PETSC_FALSE;
135         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) {
143   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
144   PetscScalar    *x2d, *y2d, alpha = 1.0;
145   const PetscInt *ranges;
146   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
147 
148   PetscFunctionBegin;
149   if (transpose) {
150     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
151     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
152     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
153     xlld = PetscMax(1, A->rmap->n);
154     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
155     PetscCheckScaLapackInfo("descinit", info);
156     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
157     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
158     ylld = 1;
159     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
160     PetscCheckScaLapackInfo("descinit", info);
161 
162     /* allocate 2d vectors */
163     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
164     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
165     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
166     xlld = PetscMax(1, lszx);
167 
168     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
169     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
170     PetscCheckScaLapackInfo("descinit", info);
171     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
172     PetscCheckScaLapackInfo("descinit", info);
173 
174     /* redistribute x as a column of a 2d matrix */
175     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
176 
177     /* redistribute y as a row of a 2d matrix */
178     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
179 
180     /* call PBLAS subroutine */
181     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
182 
183     /* redistribute y from a row of a 2d matrix */
184     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
185 
186   } else { /* non-transpose */
187 
188     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
189     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
190     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
191     xlld = 1;
192     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
193     PetscCheckScaLapackInfo("descinit", info);
194     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
195     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
196     ylld = PetscMax(1, A->rmap->n);
197     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
198     PetscCheckScaLapackInfo("descinit", info);
199 
200     /* allocate 2d vectors */
201     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
202     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
203     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
204     ylld = PetscMax(1, lszy);
205 
206     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
207     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
208     PetscCheckScaLapackInfo("descinit", info);
209     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
210     PetscCheckScaLapackInfo("descinit", info);
211 
212     /* redistribute x as a row of a 2d matrix */
213     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
214 
215     /* redistribute y as a column of a 2d matrix */
216     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
217 
218     /* call PBLAS subroutine */
219     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
220 
221     /* redistribute y from a column of a 2d matrix */
222     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
223   }
224   PetscCall(PetscFree2(x2d, y2d));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) {
229   const PetscScalar *xarray;
230   PetscScalar       *yarray;
231 
232   PetscFunctionBegin;
233   PetscCall(VecGetArrayRead(x, &xarray));
234   PetscCall(VecGetArray(y, &yarray));
235   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
236   PetscCall(VecRestoreArrayRead(x, &xarray));
237   PetscCall(VecRestoreArray(y, &yarray));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) {
242   const PetscScalar *xarray;
243   PetscScalar       *yarray;
244 
245   PetscFunctionBegin;
246   PetscCall(VecGetArrayRead(x, &xarray));
247   PetscCall(VecGetArray(y, &yarray));
248   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
249   PetscCall(VecRestoreArrayRead(x, &xarray));
250   PetscCall(VecRestoreArray(y, &yarray));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) {
255   const PetscScalar *xarray;
256   PetscScalar       *zarray;
257 
258   PetscFunctionBegin;
259   if (y != z) PetscCall(VecCopy(y, z));
260   PetscCall(VecGetArrayRead(x, &xarray));
261   PetscCall(VecGetArray(z, &zarray));
262   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
263   PetscCall(VecRestoreArrayRead(x, &xarray));
264   PetscCall(VecRestoreArray(z, &zarray));
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) {
269   const PetscScalar *xarray;
270   PetscScalar       *zarray;
271 
272   PetscFunctionBegin;
273   if (y != z) PetscCall(VecCopy(y, z));
274   PetscCall(VecGetArrayRead(x, &xarray));
275   PetscCall(VecGetArray(z, &zarray));
276   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
277   PetscCall(VecRestoreArrayRead(x, &xarray));
278   PetscCall(VecRestoreArray(z, &zarray));
279   PetscFunctionReturn(0);
280 }
281 
282 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) {
283   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
284   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
285   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
286   PetscScalar    sone = 1.0, zero = 0.0;
287   PetscBLASInt   one = 1;
288 
289   PetscFunctionBegin;
290   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
291   C->assembled = PETSC_TRUE;
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) {
296   PetscFunctionBegin;
297   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
298   PetscCall(MatSetType(C, MATSCALAPACK));
299   PetscCall(MatSetUp(C));
300   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
301   PetscFunctionReturn(0);
302 }
303 
304 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) {
305   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
306   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
307   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
308   PetscScalar    sone = 1.0, zero = 0.0;
309   PetscBLASInt   one = 1;
310 
311   PetscFunctionBegin;
312   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
313   C->assembled = PETSC_TRUE;
314   PetscFunctionReturn(0);
315 }
316 
317 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) {
318   PetscFunctionBegin;
319   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
320   PetscCall(MatSetType(C, MATSCALAPACK));
321   PetscCall(MatSetUp(C));
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) {
327   PetscFunctionBegin;
328   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
329   C->ops->productsymbolic = MatProductSymbolic_AB;
330   PetscFunctionReturn(0);
331 }
332 
333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) {
334   PetscFunctionBegin;
335   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
336   C->ops->productsymbolic          = MatProductSymbolic_ABt;
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) {
341   Mat_Product *product = C->product;
342 
343   PetscFunctionBegin;
344   switch (product->type) {
345   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break;
346   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break;
347   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
348   }
349   PetscFunctionReturn(0);
350 }
351 /* --------------------------------------- */
352 
353 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) {
354   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
355   PetscScalar    *darray, *d2d, v;
356   const PetscInt *ranges;
357   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
358 
359   PetscFunctionBegin;
360   PetscCall(VecGetArray(D, &darray));
361 
362   if (A->rmap->N <= A->cmap->N) { /* row version */
363 
364     /* create ScaLAPACK descriptor for vector (1d block distribution) */
365     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
366     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
367     dlld = PetscMax(1, A->rmap->n);
368     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
369     PetscCheckScaLapackInfo("descinit", info);
370 
371     /* allocate 2d vector */
372     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
373     PetscCall(PetscCalloc1(lszd, &d2d));
374     dlld = PetscMax(1, lszd);
375 
376     /* create ScaLAPACK descriptor for vector (2d block distribution) */
377     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
378     PetscCheckScaLapackInfo("descinit", info);
379 
380     /* collect diagonal */
381     for (j = 1; j <= a->M; j++) {
382       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
383       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
384     }
385 
386     /* redistribute d from a column of a 2d matrix */
387     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
388     PetscCall(PetscFree(d2d));
389 
390   } else { /* column version */
391 
392     /* create ScaLAPACK descriptor for vector (1d block distribution) */
393     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
394     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
395     dlld = 1;
396     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
397     PetscCheckScaLapackInfo("descinit", info);
398 
399     /* allocate 2d vector */
400     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
401     PetscCall(PetscCalloc1(lszd, &d2d));
402 
403     /* create ScaLAPACK descriptor for vector (2d block distribution) */
404     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
405     PetscCheckScaLapackInfo("descinit", info);
406 
407     /* collect diagonal */
408     for (j = 1; j <= a->N; j++) {
409       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
410       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
411     }
412 
413     /* redistribute d from a row of a 2d matrix */
414     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
415     PetscCall(PetscFree(d2d));
416   }
417 
418   PetscCall(VecRestoreArray(D, &darray));
419   PetscCall(VecAssemblyBegin(D));
420   PetscCall(VecAssemblyEnd(D));
421   PetscFunctionReturn(0);
422 }
423 
424 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) {
425   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
426   const PetscScalar *d;
427   const PetscInt    *ranges;
428   PetscScalar       *d2d;
429   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
430 
431   PetscFunctionBegin;
432   if (R) {
433     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
434     /* create ScaLAPACK descriptor for vector (1d block distribution) */
435     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
436     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
437     dlld = 1;
438     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
439     PetscCheckScaLapackInfo("descinit", info);
440 
441     /* allocate 2d vector */
442     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
443     PetscCall(PetscCalloc1(lszd, &d2d));
444 
445     /* create ScaLAPACK descriptor for vector (2d block distribution) */
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* redistribute d to a row of a 2d matrix */
450     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
451 
452     /* broadcast along process columns */
453     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
454     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
455 
456     /* local scaling */
457     for (j = 0; j < a->locc; j++)
458       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
459 
460     PetscCall(PetscFree(d2d));
461     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
462   }
463   if (L) {
464     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
465     /* create ScaLAPACK descriptor for vector (1d block distribution) */
466     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
467     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
468     dlld = PetscMax(1, A->rmap->n);
469     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
470     PetscCheckScaLapackInfo("descinit", info);
471 
472     /* allocate 2d vector */
473     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
474     PetscCall(PetscCalloc1(lszd, &d2d));
475     dlld = PetscMax(1, lszd);
476 
477     /* create ScaLAPACK descriptor for vector (2d block distribution) */
478     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
479     PetscCheckScaLapackInfo("descinit", info);
480 
481     /* redistribute d to a column of a 2d matrix */
482     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
483 
484     /* broadcast along process rows */
485     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
486     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
487 
488     /* local scaling */
489     for (i = 0; i < a->locr; i++)
490       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
491 
492     PetscCall(PetscFree(d2d));
493     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
494   }
495   PetscFunctionReturn(0);
496 }
497 
498 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) {
499   PetscFunctionBegin;
500   *missing = PETSC_FALSE;
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) {
505   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
506   PetscBLASInt   n, one = 1;
507 
508   PetscFunctionBegin;
509   n = x->lld * x->locc;
510   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
511   PetscFunctionReturn(0);
512 }
513 
514 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) {
515   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
516   PetscBLASInt   i, n;
517   PetscScalar    v;
518 
519   PetscFunctionBegin;
520   n = PetscMin(x->M, x->N);
521   for (i = 1; i <= n; i++) {
522     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
523     v += alpha;
524     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
525   }
526   PetscFunctionReturn(0);
527 }
528 
529 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
530   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
531   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
532   PetscBLASInt   one  = 1;
533   PetscScalar    beta = 1.0;
534 
535   PetscFunctionBegin;
536   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
537   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
538   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) {
543   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
544   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
545 
546   PetscFunctionBegin;
547   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
548   PetscCall(PetscObjectStateIncrease((PetscObject)B));
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) {
553   Mat            Bs;
554   MPI_Comm       comm;
555   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
556 
557   PetscFunctionBegin;
558   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
559   PetscCall(MatCreate(comm, &Bs));
560   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
561   PetscCall(MatSetType(Bs, MATSCALAPACK));
562   b       = (Mat_ScaLAPACK *)Bs->data;
563   b->M    = a->M;
564   b->N    = a->N;
565   b->mb   = a->mb;
566   b->nb   = a->nb;
567   b->rsrc = a->rsrc;
568   b->csrc = a->csrc;
569   PetscCall(MatSetUp(Bs));
570   *B = Bs;
571   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
572   Bs->assembled = PETSC_TRUE;
573   PetscFunctionReturn(0);
574 }
575 
576 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) {
577   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
578   Mat            Bs   = *B;
579   PetscBLASInt   one  = 1;
580   PetscScalar    sone = 1.0, zero = 0.0;
581 #if defined(PETSC_USE_COMPLEX)
582   PetscInt i, j;
583 #endif
584 
585   PetscFunctionBegin;
586   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
587   if (reuse == MAT_INITIAL_MATRIX) {
588     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
589     *B = Bs;
590   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
591   b = (Mat_ScaLAPACK *)Bs->data;
592   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
593 #if defined(PETSC_USE_COMPLEX)
594   /* undo conjugation */
595   for (i = 0; i < b->locr; i++)
596     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
597 #endif
598   Bs->assembled = PETSC_TRUE;
599   PetscFunctionReturn(0);
600 }
601 
602 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) {
603   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
604   PetscInt       i, j;
605 
606   PetscFunctionBegin;
607   for (i = 0; i < a->locr; i++)
608     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
609   PetscFunctionReturn(0);
610 }
611 
612 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) {
613   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
614   Mat            Bs   = *B;
615   PetscBLASInt   one  = 1;
616   PetscScalar    sone = 1.0, zero = 0.0;
617 
618   PetscFunctionBegin;
619   if (reuse == MAT_INITIAL_MATRIX) {
620     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
621     *B = Bs;
622   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
623   b = (Mat_ScaLAPACK *)Bs->data;
624   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
625   Bs->assembled = PETSC_TRUE;
626   PetscFunctionReturn(0);
627 }
628 
629 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) {
630   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
631   PetscScalar    *x, *x2d;
632   const PetscInt *ranges;
633   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
634 
635   PetscFunctionBegin;
636   PetscCall(VecCopy(B, X));
637   PetscCall(VecGetArray(X, &x));
638 
639   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
640   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
641   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
642   xlld = PetscMax(1, A->rmap->n);
643   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
644   PetscCheckScaLapackInfo("descinit", info);
645 
646   /* allocate 2d vector */
647   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
648   PetscCall(PetscMalloc1(lszx, &x2d));
649   xlld = PetscMax(1, lszx);
650 
651   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
652   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
653   PetscCheckScaLapackInfo("descinit", info);
654 
655   /* redistribute x as a column of a 2d matrix */
656   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
657 
658   /* call ScaLAPACK subroutine */
659   switch (A->factortype) {
660   case MAT_FACTOR_LU:
661     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
662     PetscCheckScaLapackInfo("getrs", info);
663     break;
664   case MAT_FACTOR_CHOLESKY:
665     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
666     PetscCheckScaLapackInfo("potrs", info);
667     break;
668   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
669   }
670 
671   /* redistribute x from a column of a 2d matrix */
672   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
673 
674   PetscCall(PetscFree(x2d));
675   PetscCall(VecRestoreArray(X, &x));
676   PetscFunctionReturn(0);
677 }
678 
679 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) {
680   PetscFunctionBegin;
681   PetscCall(MatSolve_ScaLAPACK(A, B, X));
682   PetscCall(VecAXPY(X, 1, Y));
683   PetscFunctionReturn(0);
684 }
685 
686 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) {
687   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
688   PetscBool      flg1, flg2;
689   PetscBLASInt   one = 1, info;
690 
691   PetscFunctionBegin;
692   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
693   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
694   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
695   MatScaLAPACKCheckDistribution(B, 1, X, 2);
696   b = (Mat_ScaLAPACK *)B->data;
697   x = (Mat_ScaLAPACK *)X->data;
698   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
699 
700   switch (A->factortype) {
701   case MAT_FACTOR_LU:
702     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
703     PetscCheckScaLapackInfo("getrs", info);
704     break;
705   case MAT_FACTOR_CHOLESKY:
706     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
707     PetscCheckScaLapackInfo("potrs", info);
708     break;
709   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
710   }
711   PetscFunctionReturn(0);
712 }
713 
714 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) {
715   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
716   PetscBLASInt   one = 1, info;
717 
718   PetscFunctionBegin;
719   if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); }
720   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
721   PetscCheckScaLapackInfo("getrf", info);
722   A->factortype = MAT_FACTOR_LU;
723   A->assembled  = PETSC_TRUE;
724 
725   PetscCall(PetscFree(A->solvertype));
726   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
727   PetscFunctionReturn(0);
728 }
729 
730 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) {
731   PetscFunctionBegin;
732   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
733   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
734   PetscFunctionReturn(0);
735 }
736 
737 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
738   PetscFunctionBegin;
739   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
740   PetscFunctionReturn(0);
741 }
742 
743 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) {
744   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
745   PetscBLASInt   one = 1, info;
746 
747   PetscFunctionBegin;
748   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
749   PetscCheckScaLapackInfo("potrf", info);
750   A->factortype = MAT_FACTOR_CHOLESKY;
751   A->assembled  = PETSC_TRUE;
752 
753   PetscCall(PetscFree(A->solvertype));
754   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
755   PetscFunctionReturn(0);
756 }
757 
758 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) {
759   PetscFunctionBegin;
760   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
761   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
762   PetscFunctionReturn(0);
763 }
764 
765 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) {
766   PetscFunctionBegin;
767   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) {
772   PetscFunctionBegin;
773   *type = MATSOLVERSCALAPACK;
774   PetscFunctionReturn(0);
775 }
776 
777 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) {
778   Mat            B;
779   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
780 
781   PetscFunctionBegin;
782   /* Create the factorization matrix */
783   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
784   B->trivialsymbolic = PETSC_TRUE;
785   B->factortype      = ftype;
786   PetscCall(PetscFree(B->solvertype));
787   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
788 
789   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
790   *F = B;
791   PetscFunctionReturn(0);
792 }
793 
794 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) {
795   PetscFunctionBegin;
796   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
797   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
798   PetscFunctionReturn(0);
799 }
800 
801 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) {
802   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
803   PetscBLASInt   one = 1, lwork = 0;
804   const char    *ntype;
805   PetscScalar   *work = NULL, dummy;
806 
807   PetscFunctionBegin;
808   switch (type) {
809   case NORM_1:
810     ntype = "1";
811     lwork = PetscMax(a->locr, a->locc);
812     break;
813   case NORM_FROBENIUS:
814     ntype = "F";
815     work  = &dummy;
816     break;
817   case NORM_INFINITY:
818     ntype = "I";
819     lwork = PetscMax(a->locr, a->locc);
820     break;
821   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
822   }
823   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
824   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
825   if (lwork) PetscCall(PetscFree(work));
826   PetscFunctionReturn(0);
827 }
828 
829 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) {
830   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
831 
832   PetscFunctionBegin;
833   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
834   PetscFunctionReturn(0);
835 }
836 
837 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) {
838   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
839   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
840 
841   PetscFunctionBegin;
842   if (rows) {
843     n     = a->locr;
844     nb    = a->mb;
845     isrc  = a->rsrc;
846     nproc = a->grid->nprow;
847     iproc = a->grid->myrow;
848     PetscCall(PetscMalloc1(n, &idx));
849     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
850     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
851   }
852   if (cols) {
853     n     = a->locc;
854     nb    = a->nb;
855     isrc  = a->csrc;
856     nproc = a->grid->npcol;
857     iproc = a->grid->mycol;
858     PetscCall(PetscMalloc1(n, &idx));
859     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
860     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
861   }
862   PetscFunctionReturn(0);
863 }
864 
865 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
866   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
867   Mat                Bmpi;
868   MPI_Comm           comm;
869   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
870   const PetscInt    *ranges, *branges, *cwork;
871   const PetscScalar *vwork;
872   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
873   PetscScalar       *barray;
874   PetscBool          differ = PETSC_FALSE;
875   PetscMPIInt        size;
876 
877   PetscFunctionBegin;
878   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
879   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
880 
881   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
882     PetscCallMPI(MPI_Comm_size(comm, &size));
883     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
884     for (i = 0; i < size; i++)
885       if (ranges[i + 1] != branges[i + 1]) {
886         differ = PETSC_TRUE;
887         break;
888       }
889   }
890 
891   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
892     PetscCall(MatCreate(comm, &Bmpi));
893     m = PETSC_DECIDE;
894     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
895     n = PETSC_DECIDE;
896     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
897     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
898     PetscCall(MatSetType(Bmpi, MATDENSE));
899     PetscCall(MatSetUp(Bmpi));
900 
901     /* create ScaLAPACK descriptor for B (1d block distribution) */
902     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
903     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
904     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
905     PetscCheckScaLapackInfo("descinit", info);
906 
907     /* redistribute matrix */
908     PetscCall(MatDenseGetArray(Bmpi, &barray));
909     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
910     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
911     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
912     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
913 
914     /* transfer rows of auxiliary matrix to the final matrix B */
915     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
916     for (i = rstart; i < rend; i++) {
917       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
918       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
919       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
920     }
921     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
922     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
923     PetscCall(MatDestroy(&Bmpi));
924 
925   } else { /* normal cases */
926 
927     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
928     else {
929       PetscCall(MatCreate(comm, &Bmpi));
930       m = PETSC_DECIDE;
931       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
932       n = PETSC_DECIDE;
933       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
934       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
935       PetscCall(MatSetType(Bmpi, MATDENSE));
936       PetscCall(MatSetUp(Bmpi));
937     }
938 
939     /* create ScaLAPACK descriptor for B (1d block distribution) */
940     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
941     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
942     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
943     PetscCheckScaLapackInfo("descinit", info);
944 
945     /* redistribute matrix */
946     PetscCall(MatDenseGetArray(Bmpi, &barray));
947     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
948     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
949 
950     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
951     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
952     if (reuse == MAT_INPLACE_MATRIX) {
953       PetscCall(MatHeaderReplace(A, &Bmpi));
954     } else *B = Bmpi;
955   }
956   PetscFunctionReturn(0);
957 }
958 
959 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) {
960   const PetscInt *ranges;
961   PetscMPIInt     size;
962   PetscInt        i, n;
963 
964   PetscFunctionBegin;
965   *correct = PETSC_TRUE;
966   PetscCallMPI(MPI_Comm_size(map->comm, &size));
967   if (size > 1) {
968     PetscCall(PetscLayoutGetRanges(map, &ranges));
969     n = ranges[1] - ranges[0];
970     for (i = 1; i < size; i++)
971       if (ranges[i + 1] - ranges[i] != n) break;
972     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
978   Mat_ScaLAPACK  *b;
979   Mat             Bmpi;
980   MPI_Comm        comm;
981   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
982   const PetscInt *ranges, *rows, *cols;
983   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
984   PetscScalar    *aarray;
985   IS              ir, ic;
986   PetscInt        lda;
987   PetscBool       flg;
988 
989   PetscFunctionBegin;
990   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
991 
992   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
993   else {
994     PetscCall(MatCreate(comm, &Bmpi));
995     m = PETSC_DECIDE;
996     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
997     n = PETSC_DECIDE;
998     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
999     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1000     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1001     PetscCall(MatSetUp(Bmpi));
1002   }
1003   b = (Mat_ScaLAPACK *)Bmpi->data;
1004 
1005   PetscCall(MatDenseGetLDA(A, &lda));
1006   PetscCall(MatDenseGetArray(A, &aarray));
1007   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1008   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1009   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1010     /* create ScaLAPACK descriptor for A (1d block distribution) */
1011     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1012     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1013     lld = PetscMax(lda, 1);                       /* local leading dimension */
1014     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1015     PetscCheckScaLapackInfo("descinit", info);
1016 
1017     /* redistribute matrix */
1018     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1019     Bmpi->nooffprocentries = PETSC_TRUE;
1020   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1021     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1022     b->roworiented = PETSC_FALSE;
1023     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1024     PetscCall(ISGetIndices(ir, &rows));
1025     PetscCall(ISGetIndices(ic, &cols));
1026     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1027     PetscCall(ISRestoreIndices(ir, &rows));
1028     PetscCall(ISRestoreIndices(ic, &cols));
1029     PetscCall(ISDestroy(&ic));
1030     PetscCall(ISDestroy(&ir));
1031   }
1032   PetscCall(MatDenseRestoreArray(A, &aarray));
1033   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1034   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1035   if (reuse == MAT_INPLACE_MATRIX) {
1036     PetscCall(MatHeaderReplace(A, &Bmpi));
1037   } else *B = Bmpi;
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1042   Mat                mat_scal;
1043   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1044   const PetscInt    *cols;
1045   const PetscScalar *vals;
1046 
1047   PetscFunctionBegin;
1048   if (reuse == MAT_REUSE_MATRIX) {
1049     mat_scal = *newmat;
1050     PetscCall(MatZeroEntries(mat_scal));
1051   } else {
1052     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1053     m = PETSC_DECIDE;
1054     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1055     n = PETSC_DECIDE;
1056     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1057     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1058     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1059     PetscCall(MatSetUp(mat_scal));
1060   }
1061   for (row = rstart; row < rend; row++) {
1062     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1063     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1064     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1065   }
1066   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1067   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1068 
1069   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1070   else *newmat = mat_scal;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1075   Mat                mat_scal;
1076   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1077   const PetscInt    *cols;
1078   const PetscScalar *vals;
1079   PetscScalar        v;
1080 
1081   PetscFunctionBegin;
1082   if (reuse == MAT_REUSE_MATRIX) {
1083     mat_scal = *newmat;
1084     PetscCall(MatZeroEntries(mat_scal));
1085   } else {
1086     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1087     m = PETSC_DECIDE;
1088     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1089     n = PETSC_DECIDE;
1090     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1091     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1092     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1093     PetscCall(MatSetUp(mat_scal));
1094   }
1095   PetscCall(MatGetRowUpperTriangular(A));
1096   for (row = rstart; row < rend; row++) {
1097     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1098     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1099     for (j = 0; j < ncols; j++) { /* lower triangular part */
1100       if (cols[j] == row) continue;
1101       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1102       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1103     }
1104     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1105   }
1106   PetscCall(MatRestoreRowUpperTriangular(A));
1107   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1108   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1109 
1110   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1111   else *newmat = mat_scal;
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) {
1116   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1117   PetscInt       sz = 0;
1118 
1119   PetscFunctionBegin;
1120   PetscCall(PetscLayoutSetUp(A->rmap));
1121   PetscCall(PetscLayoutSetUp(A->cmap));
1122   if (!a->lld) a->lld = a->locr;
1123 
1124   PetscCall(PetscFree(a->loc));
1125   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1126   PetscCall(PetscCalloc1(sz, &a->loc));
1127 
1128   A->preallocated = PETSC_TRUE;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) {
1133   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1134   Mat_ScaLAPACK_Grid *grid;
1135   PetscBool           flg;
1136   MPI_Comm            icomm;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(MatStashDestroy_Private(&A->stash));
1140   PetscCall(PetscFree(a->loc));
1141   PetscCall(PetscFree(a->pivots));
1142   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1143   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1144   if (--grid->grid_refct == 0) {
1145     Cblacs_gridexit(grid->ictxt);
1146     Cblacs_gridexit(grid->ictxrow);
1147     Cblacs_gridexit(grid->ictxcol);
1148     PetscCall(PetscFree(grid));
1149     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1150   }
1151   PetscCall(PetscCommDestroy(&icomm));
1152   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1153   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1154   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1156   PetscCall(PetscFree(A->data));
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) {
1161   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1162   PetscBLASInt   info = 0;
1163   PetscBool      flg;
1164 
1165   PetscFunctionBegin;
1166   PetscCall(PetscLayoutSetUp(A->rmap));
1167   PetscCall(PetscLayoutSetUp(A->cmap));
1168 
1169   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1170   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1171   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1172   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1173   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1174 
1175   /* compute local sizes */
1176   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1177   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1178   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1179   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1180   a->lld  = PetscMax(1, a->locr);
1181 
1182   /* allocate local array */
1183   PetscCall(MatScaLAPACKSetPreallocation(A));
1184 
1185   /* set up ScaLAPACK descriptor */
1186   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1187   PetscCheckScaLapackInfo("descinit", info);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) {
1192   PetscInt nstash, reallocs;
1193 
1194   PetscFunctionBegin;
1195   if (A->nooffprocentries) PetscFunctionReturn(0);
1196   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1197   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1198   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) {
1203   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1204   PetscMPIInt    n;
1205   PetscInt       i, flg, *row, *col;
1206   PetscScalar   *val;
1207   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1208 
1209   PetscFunctionBegin;
1210   if (A->nooffprocentries) PetscFunctionReturn(0);
1211   while (1) {
1212     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1213     if (!flg) break;
1214     for (i = 0; i < n; i++) {
1215       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1216       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1217       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1218       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1219       switch (A->insertmode) {
1220       case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break;
1221       case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break;
1222       default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1223       }
1224     }
1225   }
1226   PetscCall(MatStashScatterEnd_Private(&A->stash));
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) {
1231   Mat      Adense, As;
1232   MPI_Comm comm;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1236   PetscCall(MatCreate(comm, &Adense));
1237   PetscCall(MatSetType(Adense, MATDENSE));
1238   PetscCall(MatLoad(Adense, viewer));
1239   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1240   PetscCall(MatDestroy(&Adense));
1241   PetscCall(MatHeaderReplace(newMat, &As));
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 /* -------------------------------------------------------------------*/
1246 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1247                                        0,
1248                                        0,
1249                                        MatMult_ScaLAPACK,
1250                                        /* 4*/ MatMultAdd_ScaLAPACK,
1251                                        MatMultTranspose_ScaLAPACK,
1252                                        MatMultTransposeAdd_ScaLAPACK,
1253                                        MatSolve_ScaLAPACK,
1254                                        MatSolveAdd_ScaLAPACK,
1255                                        0,
1256                                        /*10*/ 0,
1257                                        MatLUFactor_ScaLAPACK,
1258                                        MatCholeskyFactor_ScaLAPACK,
1259                                        0,
1260                                        MatTranspose_ScaLAPACK,
1261                                        /*15*/ MatGetInfo_ScaLAPACK,
1262                                        0,
1263                                        MatGetDiagonal_ScaLAPACK,
1264                                        MatDiagonalScale_ScaLAPACK,
1265                                        MatNorm_ScaLAPACK,
1266                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1267                                        MatAssemblyEnd_ScaLAPACK,
1268                                        MatSetOption_ScaLAPACK,
1269                                        MatZeroEntries_ScaLAPACK,
1270                                        /*24*/ 0,
1271                                        MatLUFactorSymbolic_ScaLAPACK,
1272                                        MatLUFactorNumeric_ScaLAPACK,
1273                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1274                                        MatCholeskyFactorNumeric_ScaLAPACK,
1275                                        /*29*/ MatSetUp_ScaLAPACK,
1276                                        0,
1277                                        0,
1278                                        0,
1279                                        0,
1280                                        /*34*/ MatDuplicate_ScaLAPACK,
1281                                        0,
1282                                        0,
1283                                        0,
1284                                        0,
1285                                        /*39*/ MatAXPY_ScaLAPACK,
1286                                        0,
1287                                        0,
1288                                        0,
1289                                        MatCopy_ScaLAPACK,
1290                                        /*44*/ 0,
1291                                        MatScale_ScaLAPACK,
1292                                        MatShift_ScaLAPACK,
1293                                        0,
1294                                        0,
1295                                        /*49*/ 0,
1296                                        0,
1297                                        0,
1298                                        0,
1299                                        0,
1300                                        /*54*/ 0,
1301                                        0,
1302                                        0,
1303                                        0,
1304                                        0,
1305                                        /*59*/ 0,
1306                                        MatDestroy_ScaLAPACK,
1307                                        MatView_ScaLAPACK,
1308                                        0,
1309                                        0,
1310                                        /*64*/ 0,
1311                                        0,
1312                                        0,
1313                                        0,
1314                                        0,
1315                                        /*69*/ 0,
1316                                        0,
1317                                        MatConvert_ScaLAPACK_Dense,
1318                                        0,
1319                                        0,
1320                                        /*74*/ 0,
1321                                        0,
1322                                        0,
1323                                        0,
1324                                        0,
1325                                        /*79*/ 0,
1326                                        0,
1327                                        0,
1328                                        0,
1329                                        MatLoad_ScaLAPACK,
1330                                        /*84*/ 0,
1331                                        0,
1332                                        0,
1333                                        0,
1334                                        0,
1335                                        /*89*/ 0,
1336                                        0,
1337                                        MatMatMultNumeric_ScaLAPACK,
1338                                        0,
1339                                        0,
1340                                        /*94*/ 0,
1341                                        0,
1342                                        0,
1343                                        MatMatTransposeMultNumeric_ScaLAPACK,
1344                                        0,
1345                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1346                                        0,
1347                                        0,
1348                                        MatConjugate_ScaLAPACK,
1349                                        0,
1350                                        /*104*/ 0,
1351                                        0,
1352                                        0,
1353                                        0,
1354                                        0,
1355                                        /*109*/ MatMatSolve_ScaLAPACK,
1356                                        0,
1357                                        0,
1358                                        0,
1359                                        MatMissingDiagonal_ScaLAPACK,
1360                                        /*114*/ 0,
1361                                        0,
1362                                        0,
1363                                        0,
1364                                        0,
1365                                        /*119*/ 0,
1366                                        MatHermitianTranspose_ScaLAPACK,
1367                                        0,
1368                                        0,
1369                                        0,
1370                                        /*124*/ 0,
1371                                        0,
1372                                        0,
1373                                        0,
1374                                        0,
1375                                        /*129*/ 0,
1376                                        0,
1377                                        0,
1378                                        0,
1379                                        0,
1380                                        /*134*/ 0,
1381                                        0,
1382                                        0,
1383                                        0,
1384                                        0,
1385                                        0,
1386                                        /*140*/ 0,
1387                                        0,
1388                                        0,
1389                                        0,
1390                                        0,
1391                                        /*145*/ 0,
1392                                        0,
1393                                        0,
1394                                        0,
1395                                        0,
1396                                        /*150*/ 0};
1397 
1398 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) {
1399   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1400   PetscInt           size = stash->size, nsends;
1401   PetscInt           count, *sindices, **rindices, i, j, l;
1402   PetscScalar      **rvalues, *svalues;
1403   MPI_Comm           comm = stash->comm;
1404   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1405   PetscMPIInt       *sizes, *nlengths, nreceives;
1406   PetscInt          *sp_idx, *sp_idy;
1407   PetscScalar       *sp_val;
1408   PetscMatStashSpace space, space_next;
1409   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1410   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1411 
1412   PetscFunctionBegin;
1413   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1414     InsertMode addv;
1415     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1416     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1417     mat->insertmode = addv; /* in case this processor had no cache */
1418   }
1419 
1420   bs2 = stash->bs * stash->bs;
1421 
1422   /*  first count number of contributors to each processor */
1423   PetscCall(PetscCalloc1(size, &nlengths));
1424   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1425 
1426   i = j = 0;
1427   space = stash->space_head;
1428   while (space) {
1429     space_next = space->next;
1430     for (l = 0; l < space->local_used; l++) {
1431       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1432       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1433       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1434       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1435       nlengths[j]++;
1436       owner[i] = j;
1437       i++;
1438     }
1439     space = space_next;
1440   }
1441 
1442   /* Now check what procs get messages - and compute nsends. */
1443   PetscCall(PetscCalloc1(size, &sizes));
1444   for (i = 0, nsends = 0; i < size; i++) {
1445     if (nlengths[i]) {
1446       sizes[i] = 1;
1447       nsends++;
1448     }
1449   }
1450 
1451   {
1452     PetscMPIInt *onodes, *olengths;
1453     /* Determine the number of messages to expect, their lengths, from from-ids */
1454     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1455     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1456     /* since clubbing row,col - lengths are multiplied by 2 */
1457     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1458     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1459     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1460     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1461     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1462     PetscCall(PetscFree(onodes));
1463     PetscCall(PetscFree(olengths));
1464   }
1465 
1466   /* do sends:
1467       1) starts[i] gives the starting index in svalues for stuff going to
1468          the ith processor
1469   */
1470   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1471   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1472   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1473   /* use 2 sends the first with all_a, the next with all_i and all_j */
1474   startv[0] = 0;
1475   starti[0] = 0;
1476   for (i = 1; i < size; i++) {
1477     startv[i] = startv[i - 1] + nlengths[i - 1];
1478     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1479   }
1480 
1481   i     = 0;
1482   space = stash->space_head;
1483   while (space) {
1484     space_next = space->next;
1485     sp_idx     = space->idx;
1486     sp_idy     = space->idy;
1487     sp_val     = space->val;
1488     for (l = 0; l < space->local_used; l++) {
1489       j = owner[i];
1490       if (bs2 == 1) {
1491         svalues[startv[j]] = sp_val[l];
1492       } else {
1493         PetscInt     k;
1494         PetscScalar *buf1, *buf2;
1495         buf1 = svalues + bs2 * startv[j];
1496         buf2 = space->val + bs2 * l;
1497         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1498       }
1499       sindices[starti[j]]               = sp_idx[l];
1500       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1501       startv[j]++;
1502       starti[j]++;
1503       i++;
1504     }
1505     space = space_next;
1506   }
1507   startv[0] = 0;
1508   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1509 
1510   for (i = 0, count = 0; i < size; i++) {
1511     if (sizes[i]) {
1512       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1513       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1514     }
1515   }
1516 #if defined(PETSC_USE_INFO)
1517   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1518   for (i = 0; i < size; i++) {
1519     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1520   }
1521 #endif
1522   PetscCall(PetscFree(nlengths));
1523   PetscCall(PetscFree(owner));
1524   PetscCall(PetscFree2(startv, starti));
1525   PetscCall(PetscFree(sizes));
1526 
1527   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1528   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1529 
1530   for (i = 0; i < nreceives; i++) {
1531     recv_waits[2 * i]     = recv_waits1[i];
1532     recv_waits[2 * i + 1] = recv_waits2[i];
1533   }
1534   stash->recv_waits = recv_waits;
1535 
1536   PetscCall(PetscFree(recv_waits1));
1537   PetscCall(PetscFree(recv_waits2));
1538 
1539   stash->svalues         = svalues;
1540   stash->sindices        = sindices;
1541   stash->rvalues         = rvalues;
1542   stash->rindices        = rindices;
1543   stash->send_waits      = send_waits;
1544   stash->nsends          = nsends;
1545   stash->nrecvs          = nreceives;
1546   stash->reproduce_count = 0;
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) {
1551   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1552 
1553   PetscFunctionBegin;
1554   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1555   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1556   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1557   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1558   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 /*@
1563    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1564    the `MATSCALAPACK` matrix
1565 
1566    Logically Collective on A
1567 
1568    Input Parameters:
1569 +  A  - a `MATSCALAPACK` matrix
1570 .  mb - the row block size
1571 -  nb - the column block size
1572 
1573    Level: intermediate
1574 
1575 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1576 @*/
1577 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) {
1578   PetscFunctionBegin;
1579   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1580   PetscValidLogicalCollectiveInt(A, mb, 2);
1581   PetscValidLogicalCollectiveInt(A, nb, 3);
1582   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) {
1587   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1588 
1589   PetscFunctionBegin;
1590   if (mb) *mb = a->mb;
1591   if (nb) *nb = a->nb;
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 /*@
1596    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1597    the `MATSCALAPACK` matrix
1598 
1599    Not collective
1600 
1601    Input Parameter:
1602 .  A  - a `MATSCALAPACK` matrix
1603 
1604    Output Parameters:
1605 +  mb - the row block size
1606 -  nb - the column block size
1607 
1608    Level: intermediate
1609 
1610 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1611 @*/
1612 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) {
1613   PetscFunctionBegin;
1614   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1615   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1620 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1621 
1622 /*MC
1623    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1624 
1625    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1626 
1627    Options Database Keys:
1628 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()`
1629 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1630 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1631 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1632 
1633   Note:
1634    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1635    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1636    the given rank.
1637 
1638    Level: beginner
1639 
1640 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1641 M*/
1642 
1643 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) {
1644   Mat_ScaLAPACK      *a;
1645   PetscBool           flg, flg1;
1646   Mat_ScaLAPACK_Grid *grid;
1647   MPI_Comm            icomm;
1648   PetscBLASInt        nprow, npcol, myrow, mycol;
1649   PetscInt            optv1, k = 2, array[2] = {0, 0};
1650   PetscMPIInt         size;
1651 
1652   PetscFunctionBegin;
1653   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1654   A->insertmode = NOT_SET_VALUES;
1655 
1656   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1657   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1658   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1659   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1660   A->stash.ScatterDestroy = NULL;
1661 
1662   PetscCall(PetscNew(&a));
1663   A->data = (void *)a;
1664 
1665   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1666   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1667     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1668     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1669     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1670   }
1671   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1672   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1673   if (!flg) {
1674     PetscCall(PetscNew(&grid));
1675 
1676     PetscCallMPI(MPI_Comm_size(icomm, &size));
1677     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1678 
1679     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1680     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1681     if (flg1) {
1682       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1683       grid->nprow = optv1;
1684     }
1685     PetscOptionsEnd();
1686 
1687     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1688     grid->npcol = size / grid->nprow;
1689     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1690     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1691     grid->ictxt = Csys2blacs_handle(icomm);
1692     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1693     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1694     grid->grid_refct = 1;
1695     grid->nprow      = nprow;
1696     grid->npcol      = npcol;
1697     grid->myrow      = myrow;
1698     grid->mycol      = mycol;
1699     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1700     grid->ictxrow    = Csys2blacs_handle(icomm);
1701     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1702     grid->ictxcol = Csys2blacs_handle(icomm);
1703     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1704     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1705 
1706   } else grid->grid_refct++;
1707   PetscCall(PetscCommDestroy(&icomm));
1708   a->grid = grid;
1709   a->mb   = DEFAULT_BLOCKSIZE;
1710   a->nb   = DEFAULT_BLOCKSIZE;
1711 
1712   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1713   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1714   if (flg) {
1715     a->mb = array[0];
1716     a->nb = (k > 1) ? array[1] : a->mb;
1717   }
1718   PetscOptionsEnd();
1719 
1720   a->roworiented = PETSC_TRUE;
1721   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1722   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1723   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1724   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /*@C
1729    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1730    (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1731 
1732    Collective
1733 
1734    Input Parameters:
1735 +  comm - MPI communicator
1736 .  mb   - row block size (or `PETSC_DECIDE` to have it set)
1737 .  nb   - column block size (or `PETSC_DECIDE` to have it set)
1738 .  M    - number of global rows
1739 .  N    - number of global columns
1740 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1741 -  csrc - coordinate of process that owns the first column of the distributed matrix
1742 
1743    Output Parameter:
1744 .  A - the matrix
1745 
1746    Options Database Key:
1747 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1748 
1749    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1750    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1751    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1752 
1753    Note:
1754    If `PETSC_DECIDE` is used for the block sizes, then an appropriate value
1755    is chosen.
1756 
1757    Storage Information:
1758    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1759    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1760    significance and are thus ignored. The block sizes refer to the values
1761    used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1762 
1763    Level: intermediate
1764 
1765 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1766 @*/
1767 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) {
1768   Mat_ScaLAPACK *a;
1769   PetscInt       m, n;
1770 
1771   PetscFunctionBegin;
1772   PetscCall(MatCreate(comm, A));
1773   PetscCall(MatSetType(*A, MATSCALAPACK));
1774   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1775   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1776   m = PETSC_DECIDE;
1777   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1778   n = PETSC_DECIDE;
1779   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1780   PetscCall(MatSetSizes(*A, m, n, M, N));
1781   a = (Mat_ScaLAPACK *)(*A)->data;
1782   PetscCall(PetscBLASIntCast(M, &a->M));
1783   PetscCall(PetscBLASIntCast(N, &a->N));
1784   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1785   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1786   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1787   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1788   PetscCall(MatSetUp(*A));
1789   PetscFunctionReturn(0);
1790 }
1791