xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 5e4d33a35366e61dfee3610135fe81141e576059)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
327e75052SPierre Jolivet const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet                                        "       PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet                                        "       ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet                                        "       YEAR = 1997\n"
1127e75052SPierre Jolivet                                        "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite       = PETSC_FALSE;
1327e75052SPierre Jolivet 
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman 
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman 
22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23d71ae5a4SJacob Faibussowitsch {
24f7ec113fSDamian Marek   PetscFunctionBegin;
259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
269566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28f7ec113fSDamian Marek }
29f7ec113fSDamian Marek 
30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31d71ae5a4SJacob Faibussowitsch {
32d24d4204SJose E. Roman   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33d24d4204SJose E. Roman   PetscBool         iascii;
34d24d4204SJose E. Roman   PetscViewerFormat format;
35d24d4204SJose E. Roman   Mat               Adense;
36d24d4204SJose E. Roman 
37d24d4204SJose E. Roman   PetscFunctionBegin;
389566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39d24d4204SJose E. Roman   if (iascii) {
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
41d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
429566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
439566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
449566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
459566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
463ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
47d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
483ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
49d24d4204SJose E. Roman     }
50d24d4204SJose E. Roman   }
51d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
529566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
539566063dSJacob Faibussowitsch   PetscCall(MatView(Adense, viewer));
549566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56d24d4204SJose E. Roman }
57d24d4204SJose E. Roman 
58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59d71ae5a4SJacob Faibussowitsch {
60d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61d24d4204SJose E. Roman   PetscLogDouble isend[2], irecv[2];
62d24d4204SJose E. Roman 
63d24d4204SJose E. Roman   PetscFunctionBegin;
64d24d4204SJose E. Roman   info->block_size = 1.0;
65d24d4204SJose E. Roman 
66d24d4204SJose E. Roman   isend[0] = a->lld * a->locc;  /* locally allocated */
67d24d4204SJose E. Roman   isend[1] = a->locr * a->locc; /* used submatrix */
68d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69d24d4204SJose E. Roman     info->nz_allocated = isend[0];
70d24d4204SJose E. Roman     info->nz_used      = isend[1];
71d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
7257168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
74d24d4204SJose E. Roman     info->nz_used      = irecv[1];
75d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
7657168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
78d24d4204SJose E. Roman     info->nz_used      = irecv[1];
79d24d4204SJose E. Roman   }
80d24d4204SJose E. Roman 
81d24d4204SJose E. Roman   info->nz_unneeded       = 0;
82d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
83d24d4204SJose E. Roman   info->mallocs           = 0;
844dfa11a4SJacob Faibussowitsch   info->memory            = 0; /* REVIEW ME */
85d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
86d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
87d24d4204SJose E. Roman   info->factor_mallocs    = 0;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89d24d4204SJose E. Roman }
90d24d4204SJose E. Roman 
9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92d71ae5a4SJacob Faibussowitsch {
93b12397e7SPierre Jolivet   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94b12397e7SPierre Jolivet 
95d24d4204SJose E. Roman   PetscFunctionBegin;
96d24d4204SJose E. Roman   switch (op) {
97d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATIONS:
98d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATION_ERR:
99d24d4204SJose E. Roman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100d24d4204SJose E. Roman   case MAT_SYMMETRIC:
101d24d4204SJose E. Roman   case MAT_SORTED_FULL:
102d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
103d71ae5a4SJacob Faibussowitsch     break;
104d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
105d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
106d71ae5a4SJacob Faibussowitsch     break;
107d71ae5a4SJacob Faibussowitsch   default:
108d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109d24d4204SJose E. Roman   }
1103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
111d24d4204SJose E. Roman }
112d24d4204SJose E. Roman 
113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114d71ae5a4SJacob Faibussowitsch {
115d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116d24d4204SJose E. Roman   PetscInt       i, j;
117d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118b12397e7SPierre Jolivet   PetscBool      roworiented = a->roworiented;
119d24d4204SJose E. Roman 
120d24d4204SJose E. Roman   PetscFunctionBegin;
121b12397e7SPierre Jolivet   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122d24d4204SJose E. Roman   for (i = 0; i < nr; i++) {
123d24d4204SJose E. Roman     if (rows[i] < 0) continue;
1249566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125d24d4204SJose E. Roman     for (j = 0; j < nc; j++) {
126d24d4204SJose E. Roman       if (cols[j] < 0) continue;
1279566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129d24d4204SJose E. Roman       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130b12397e7SPierre Jolivet         if (roworiented) {
131d24d4204SJose E. Roman           switch (imode) {
132d71ae5a4SJacob Faibussowitsch           case INSERT_VALUES:
133d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134d71ae5a4SJacob Faibussowitsch             break;
135d71ae5a4SJacob Faibussowitsch           default:
136d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137d71ae5a4SJacob Faibussowitsch             break;
138b12397e7SPierre Jolivet           }
139b12397e7SPierre Jolivet         } else {
140b12397e7SPierre Jolivet           switch (imode) {
141d71ae5a4SJacob Faibussowitsch           case INSERT_VALUES:
142d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143d71ae5a4SJacob Faibussowitsch             break;
144d71ae5a4SJacob Faibussowitsch           default:
145d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146d71ae5a4SJacob Faibussowitsch             break;
147b12397e7SPierre Jolivet           }
148d24d4204SJose E. Roman         }
149d24d4204SJose E. Roman       } else {
15028b400f6SJacob Faibussowitsch         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
152b12397e7SPierre Jolivet         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153d24d4204SJose E. Roman       }
154d24d4204SJose E. Roman     }
155d24d4204SJose E. Roman   }
1563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
157d24d4204SJose E. Roman }
158d24d4204SJose E. Roman 
159*5e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160d71ae5a4SJacob Faibussowitsch {
161d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162d24d4204SJose E. Roman   PetscScalar    *x2d, *y2d, alpha = 1.0;
163d24d4204SJose E. Roman   const PetscInt *ranges;
164d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165d24d4204SJose E. Roman 
166d24d4204SJose E. Roman   PetscFunctionBegin;
167d24d4204SJose E. Roman   if (transpose) {
168d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1699566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1709566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171d24d4204SJose E. Roman     xlld = PetscMax(1, A->rmap->n);
172792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1749566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
1759566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176d24d4204SJose E. Roman     ylld = 1;
177792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
179d24d4204SJose E. Roman 
180d24d4204SJose E. Roman     /* allocate 2d vectors */
181d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184d24d4204SJose E. Roman     xlld = PetscMax(1, lszx);
185d24d4204SJose E. Roman 
186d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
189792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
191d24d4204SJose E. Roman 
192d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
193792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194d24d4204SJose E. Roman 
195d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
196792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197d24d4204SJose E. Roman 
198d24d4204SJose E. Roman     /* call PBLAS subroutine */
199*5e4d33a3SBlanca Mellado Pinto     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200*5e4d33a3SBlanca Mellado Pinto     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201d24d4204SJose E. Roman 
202d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
203792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204d24d4204SJose E. Roman 
205d24d4204SJose E. Roman   } else { /* non-transpose */
206d24d4204SJose E. Roman 
207d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
2089566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
2099566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210d24d4204SJose E. Roman     xlld = 1;
211792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
2139566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
2149566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215d24d4204SJose E. Roman     ylld = PetscMax(1, A->rmap->n);
216792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
218d24d4204SJose E. Roman 
219d24d4204SJose E. Roman     /* allocate 2d vectors */
220d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
2229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223d24d4204SJose E. Roman     ylld = PetscMax(1, lszy);
224d24d4204SJose E. Roman 
225d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
228792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
230d24d4204SJose E. Roman 
231d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
232792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233d24d4204SJose E. Roman 
234d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
235792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236d24d4204SJose E. Roman 
237d24d4204SJose E. Roman     /* call PBLAS subroutine */
238792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239d24d4204SJose E. Roman 
240d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
241792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242d24d4204SJose E. Roman   }
2439566063dSJacob Faibussowitsch   PetscCall(PetscFree2(x2d, y2d));
2443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
245d24d4204SJose E. Roman }
246d24d4204SJose E. Roman 
247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248d71ae5a4SJacob Faibussowitsch {
249d24d4204SJose E. Roman   const PetscScalar *xarray;
250d24d4204SJose E. Roman   PetscScalar       *yarray;
251d24d4204SJose E. Roman 
252d24d4204SJose E. Roman   PetscFunctionBegin;
2539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
255*5e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
2569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
2583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259d24d4204SJose E. Roman }
260d24d4204SJose E. Roman 
261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262d71ae5a4SJacob Faibussowitsch {
263d24d4204SJose E. Roman   const PetscScalar *xarray;
264d24d4204SJose E. Roman   PetscScalar       *yarray;
265d24d4204SJose E. Roman 
266d24d4204SJose E. Roman   PetscFunctionBegin;
2679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2689566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
269*5e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
2709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
2723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
273d24d4204SJose E. Roman }
274d24d4204SJose E. Roman 
275*5e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276*5e4d33a3SBlanca Mellado Pinto {
277*5e4d33a3SBlanca Mellado Pinto   const PetscScalar *xarray;
278*5e4d33a3SBlanca Mellado Pinto   PetscScalar       *yarray;
279*5e4d33a3SBlanca Mellado Pinto 
280*5e4d33a3SBlanca Mellado Pinto   PetscFunctionBegin;
281*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArrayRead(x, &xarray));
282*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArrayWrite(y, &yarray));
283*5e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArrayRead(x, &xarray));
285*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArrayWrite(y, &yarray));
286*5e4d33a3SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
287*5e4d33a3SBlanca Mellado Pinto }
288*5e4d33a3SBlanca Mellado Pinto 
289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290d71ae5a4SJacob Faibussowitsch {
291d24d4204SJose E. Roman   const PetscScalar *xarray;
292d24d4204SJose E. Roman   PetscScalar       *zarray;
293d24d4204SJose E. Roman 
294d24d4204SJose E. Roman   PetscFunctionBegin;
2959566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
2969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
298*5e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
2999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
3009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
3013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
302d24d4204SJose E. Roman }
303d24d4204SJose E. Roman 
304d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305d71ae5a4SJacob Faibussowitsch {
306d24d4204SJose E. Roman   const PetscScalar *xarray;
307d24d4204SJose E. Roman   PetscScalar       *zarray;
308d24d4204SJose E. Roman 
309d24d4204SJose E. Roman   PetscFunctionBegin;
3109566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
3119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
3129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
313*5e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArrayRead(x, &xarray));
315*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArray(z, &zarray));
316*5e4d33a3SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
317*5e4d33a3SBlanca Mellado Pinto }
318*5e4d33a3SBlanca Mellado Pinto 
319*5e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320*5e4d33a3SBlanca Mellado Pinto {
321*5e4d33a3SBlanca Mellado Pinto   const PetscScalar *xarray;
322*5e4d33a3SBlanca Mellado Pinto   PetscScalar       *zarray;
323*5e4d33a3SBlanca Mellado Pinto 
324*5e4d33a3SBlanca Mellado Pinto   PetscFunctionBegin;
325*5e4d33a3SBlanca Mellado Pinto   if (y != z) PetscCall(VecCopy(y, z));
326*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArrayRead(x, &xarray));
327*5e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArray(z, &zarray));
328*5e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
3299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
3309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
3313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
332d24d4204SJose E. Roman }
333d24d4204SJose E. Roman 
334d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335d71ae5a4SJacob Faibussowitsch {
336d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
340d24d4204SJose E. Roman   PetscBLASInt   one = 1;
341d24d4204SJose E. Roman 
342d24d4204SJose E. Roman   PetscFunctionBegin;
343792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
3453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
346d24d4204SJose E. Roman }
347d24d4204SJose E. Roman 
348d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349d71ae5a4SJacob Faibussowitsch {
350d24d4204SJose E. Roman   PetscFunctionBegin;
3519566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3529566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
3539566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
354d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
3553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
356d24d4204SJose E. Roman }
357d24d4204SJose E. Roman 
358d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359d71ae5a4SJacob Faibussowitsch {
360d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
364d24d4204SJose E. Roman   PetscBLASInt   one = 1;
365d24d4204SJose E. Roman 
366d24d4204SJose E. Roman   PetscFunctionBegin;
367792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
3693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
370d24d4204SJose E. Roman }
371d24d4204SJose E. Roman 
372d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373d71ae5a4SJacob Faibussowitsch {
374d24d4204SJose E. Roman   PetscFunctionBegin;
3759566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
3769566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
3779566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
3783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
379d24d4204SJose E. Roman }
380d24d4204SJose E. Roman 
381d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
382d71ae5a4SJacob Faibussowitsch {
383d24d4204SJose E. Roman   PetscFunctionBegin;
384d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
385d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
3863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
387d24d4204SJose E. Roman }
388d24d4204SJose E. Roman 
389d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
390d71ae5a4SJacob Faibussowitsch {
391d24d4204SJose E. Roman   PetscFunctionBegin;
392d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
393d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
395d24d4204SJose E. Roman }
396d24d4204SJose E. Roman 
397d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
398d71ae5a4SJacob Faibussowitsch {
399d24d4204SJose E. Roman   Mat_Product *product = C->product;
400d24d4204SJose E. Roman 
401d24d4204SJose E. Roman   PetscFunctionBegin;
402d24d4204SJose E. Roman   switch (product->type) {
403d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
404d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
405d71ae5a4SJacob Faibussowitsch     break;
406d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
407d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
408d71ae5a4SJacob Faibussowitsch     break;
409d71ae5a4SJacob Faibussowitsch   default:
410d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
411d24d4204SJose E. Roman   }
4123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
413d24d4204SJose E. Roman }
414d24d4204SJose E. Roman 
415d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
416d71ae5a4SJacob Faibussowitsch {
417d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
418d24d4204SJose E. Roman   PetscScalar    *darray, *d2d, v;
419d24d4204SJose E. Roman   const PetscInt *ranges;
420d24d4204SJose E. Roman   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
421d24d4204SJose E. Roman 
422d24d4204SJose E. Roman   PetscFunctionBegin;
4239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(D, &darray));
424d24d4204SJose E. Roman 
425d24d4204SJose E. Roman   if (A->rmap->N <= A->cmap->N) { /* row version */
426d24d4204SJose E. Roman 
427d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4289566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
4299566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
430d24d4204SJose E. Roman     dlld = PetscMax(1, A->rmap->n);
431792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
432d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
433d24d4204SJose E. Roman 
434d24d4204SJose E. Roman     /* allocate 2d vector */
435d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
4369566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
437d24d4204SJose E. Roman     dlld = PetscMax(1, lszd);
438d24d4204SJose E. Roman 
439d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
440792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
441d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
442d24d4204SJose E. Roman 
443d24d4204SJose E. Roman     /* collect diagonal */
444d24d4204SJose E. Roman     for (j = 1; j <= a->M; j++) {
445792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
446792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
447d24d4204SJose E. Roman     }
448d24d4204SJose E. Roman 
449d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
450792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
4519566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
452d24d4204SJose E. Roman 
453d24d4204SJose E. Roman   } else { /* column version */
454d24d4204SJose E. Roman 
455d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4569566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
4579566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
458d24d4204SJose E. Roman     dlld = 1;
459792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
460d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
461d24d4204SJose E. Roman 
462d24d4204SJose E. Roman     /* allocate 2d vector */
463d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4649566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
465d24d4204SJose E. Roman 
466d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
467792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
468d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
469d24d4204SJose E. Roman 
470d24d4204SJose E. Roman     /* collect diagonal */
471d24d4204SJose E. Roman     for (j = 1; j <= a->N; j++) {
472792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
473792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
474d24d4204SJose E. Roman     }
475d24d4204SJose E. Roman 
476d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
477792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
4789566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
479d24d4204SJose E. Roman   }
480d24d4204SJose E. Roman 
4819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(D, &darray));
4829566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(D));
4839566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(D));
4843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
485d24d4204SJose E. Roman }
486d24d4204SJose E. Roman 
487d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
488d71ae5a4SJacob Faibussowitsch {
489d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
490d24d4204SJose E. Roman   const PetscScalar *d;
491d24d4204SJose E. Roman   const PetscInt    *ranges;
492d24d4204SJose E. Roman   PetscScalar       *d2d;
493d24d4204SJose E. Roman   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
494d24d4204SJose E. Roman 
495d24d4204SJose E. Roman   PetscFunctionBegin;
496d24d4204SJose E. Roman   if (R) {
4979566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
498d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4999566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
5009566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
501d24d4204SJose E. Roman     dlld = 1;
502792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
503d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
504d24d4204SJose E. Roman 
505d24d4204SJose E. Roman     /* allocate 2d vector */
506d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
5079566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
508d24d4204SJose E. Roman 
509d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
510792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
511d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
512d24d4204SJose E. Roman 
513d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
514792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
515d24d4204SJose E. Roman 
516d24d4204SJose E. Roman     /* broadcast along process columns */
517d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
518d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
519d24d4204SJose E. Roman 
520d24d4204SJose E. Roman     /* local scaling */
5219371c9d4SSatish Balay     for (j = 0; j < a->locc; j++)
5229371c9d4SSatish Balay       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
523d24d4204SJose E. Roman 
5249566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
5259566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
526d24d4204SJose E. Roman   }
527d24d4204SJose E. Roman   if (L) {
5289566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
529d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
5309566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
5319566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
532d24d4204SJose E. Roman     dlld = PetscMax(1, A->rmap->n);
533792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
534d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
535d24d4204SJose E. Roman 
536d24d4204SJose E. Roman     /* allocate 2d vector */
537d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
5389566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
539d24d4204SJose E. Roman     dlld = PetscMax(1, lszd);
540d24d4204SJose E. Roman 
541d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
542792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
543d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
544d24d4204SJose E. Roman 
545d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
546792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
547d24d4204SJose E. Roman 
548d24d4204SJose E. Roman     /* broadcast along process rows */
549d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
550d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
551d24d4204SJose E. Roman 
552d24d4204SJose E. Roman     /* local scaling */
5539371c9d4SSatish Balay     for (i = 0; i < a->locr; i++)
5549371c9d4SSatish Balay       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
555d24d4204SJose E. Roman 
5569566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
5579566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
558d24d4204SJose E. Roman   }
5593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
560d24d4204SJose E. Roman }
561d24d4204SJose E. Roman 
562d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
563d71ae5a4SJacob Faibussowitsch {
564d24d4204SJose E. Roman   PetscFunctionBegin;
565d24d4204SJose E. Roman   *missing = PETSC_FALSE;
5663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
567d24d4204SJose E. Roman }
568d24d4204SJose E. Roman 
569d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
570d71ae5a4SJacob Faibussowitsch {
571d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
572d24d4204SJose E. Roman   PetscBLASInt   n, one = 1;
573d24d4204SJose E. Roman 
574d24d4204SJose E. Roman   PetscFunctionBegin;
575d24d4204SJose E. Roman   n = x->lld * x->locc;
576792fecdfSBarry Smith   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
5773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
578d24d4204SJose E. Roman }
579d24d4204SJose E. Roman 
580d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
581d71ae5a4SJacob Faibussowitsch {
582d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
583d24d4204SJose E. Roman   PetscBLASInt   i, n;
584d24d4204SJose E. Roman   PetscScalar    v;
585d24d4204SJose E. Roman 
586d24d4204SJose E. Roman   PetscFunctionBegin;
587d24d4204SJose E. Roman   n = PetscMin(x->M, x->N);
588d24d4204SJose E. Roman   for (i = 1; i <= n; i++) {
589792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
590d24d4204SJose E. Roman     v += alpha;
591792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
592d24d4204SJose E. Roman   }
5933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
594d24d4204SJose E. Roman }
595d24d4204SJose E. Roman 
596d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
597d71ae5a4SJacob Faibussowitsch {
598d24d4204SJose E. Roman   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
599d24d4204SJose E. Roman   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
600d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
601d24d4204SJose E. Roman   PetscScalar    beta = 1.0;
602d24d4204SJose E. Roman 
603d24d4204SJose E. Roman   PetscFunctionBegin;
604d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
605792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
6069566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
6073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
608d24d4204SJose E. Roman }
609d24d4204SJose E. Roman 
610d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
611d71ae5a4SJacob Faibussowitsch {
612d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
613d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
614d24d4204SJose E. Roman 
615d24d4204SJose E. Roman   PetscFunctionBegin;
6169566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
6179566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
6183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
619d24d4204SJose E. Roman }
620d24d4204SJose E. Roman 
621d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
622d71ae5a4SJacob Faibussowitsch {
623d24d4204SJose E. Roman   Mat            Bs;
624d24d4204SJose E. Roman   MPI_Comm       comm;
625d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
626d24d4204SJose E. Roman 
627d24d4204SJose E. Roman   PetscFunctionBegin;
6289566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
6299566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Bs));
6309566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
6319566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bs, MATSCALAPACK));
632d24d4204SJose E. Roman   b       = (Mat_ScaLAPACK *)Bs->data;
633d24d4204SJose E. Roman   b->M    = a->M;
634d24d4204SJose E. Roman   b->N    = a->N;
635d24d4204SJose E. Roman   b->mb   = a->mb;
636d24d4204SJose E. Roman   b->nb   = a->nb;
637d24d4204SJose E. Roman   b->rsrc = a->rsrc;
638d24d4204SJose E. Roman   b->csrc = a->csrc;
6399566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Bs));
640d24d4204SJose E. Roman   *B = Bs;
64148a46eb9SPierre Jolivet   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
642d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
644d24d4204SJose E. Roman }
645d24d4204SJose E. Roman 
646d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
647d71ae5a4SJacob Faibussowitsch {
648d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
649d24d4204SJose E. Roman   Mat            Bs   = *B;
650d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
651d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
652d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
653d24d4204SJose E. Roman   PetscInt i, j;
654d24d4204SJose E. Roman #endif
655d24d4204SJose E. Roman 
656d24d4204SJose E. Roman   PetscFunctionBegin;
6577fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
6580fdf79fbSJacob Faibussowitsch   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
6599566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
660d24d4204SJose E. Roman   *B = Bs;
661d24d4204SJose E. Roman   b  = (Mat_ScaLAPACK *)Bs->data;
662792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
663d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
664d24d4204SJose E. Roman   /* undo conjugation */
6659371c9d4SSatish Balay   for (i = 0; i < b->locr; i++)
6669371c9d4SSatish Balay     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
667d24d4204SJose E. Roman #endif
668d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
670d24d4204SJose E. Roman }
671d24d4204SJose E. Roman 
672d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
673d71ae5a4SJacob Faibussowitsch {
674d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
675d24d4204SJose E. Roman   PetscInt       i, j;
676d24d4204SJose E. Roman 
677d24d4204SJose E. Roman   PetscFunctionBegin;
6789371c9d4SSatish Balay   for (i = 0; i < a->locr; i++)
6799371c9d4SSatish Balay     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
6803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
681d24d4204SJose E. Roman }
682d24d4204SJose E. Roman 
683d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
684d71ae5a4SJacob Faibussowitsch {
685d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
686d24d4204SJose E. Roman   Mat            Bs   = *B;
687d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
688d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
689d24d4204SJose E. Roman 
690d24d4204SJose E. Roman   PetscFunctionBegin;
6910fdf79fbSJacob Faibussowitsch   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
6929566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
693d24d4204SJose E. Roman   *B = Bs;
694d24d4204SJose E. Roman   b  = (Mat_ScaLAPACK *)Bs->data;
695792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
696d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
698d24d4204SJose E. Roman }
699d24d4204SJose E. Roman 
700d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
701d71ae5a4SJacob Faibussowitsch {
702d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
703d24d4204SJose E. Roman   PetscScalar    *x, *x2d;
704d24d4204SJose E. Roman   const PetscInt *ranges;
705d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
706d24d4204SJose E. Roman 
707d24d4204SJose E. Roman   PetscFunctionBegin;
7089566063dSJacob Faibussowitsch   PetscCall(VecCopy(B, X));
7099566063dSJacob Faibussowitsch   PetscCall(VecGetArray(X, &x));
710d24d4204SJose E. Roman 
711d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
7129566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
7139566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
714d24d4204SJose E. Roman   xlld = PetscMax(1, A->rmap->n);
715792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
716d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
717d24d4204SJose E. Roman 
718d24d4204SJose E. Roman   /* allocate 2d vector */
719d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
7209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(lszx, &x2d));
721d24d4204SJose E. Roman   xlld = PetscMax(1, lszx);
722d24d4204SJose E. Roman 
723d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
724792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
725d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
726d24d4204SJose E. Roman 
727d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
728792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
729d24d4204SJose E. Roman 
730d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
731d24d4204SJose E. Roman   switch (A->factortype) {
732d24d4204SJose E. Roman   case MAT_FACTOR_LU:
733792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
734d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
735d24d4204SJose E. Roman     break;
736d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
737792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
738d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
739d24d4204SJose E. Roman     break;
740d71ae5a4SJacob Faibussowitsch   default:
741d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
742d24d4204SJose E. Roman   }
743d24d4204SJose E. Roman 
744d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
745792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
746d24d4204SJose E. Roman 
7479566063dSJacob Faibussowitsch   PetscCall(PetscFree(x2d));
7489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(X, &x));
7493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
750d24d4204SJose E. Roman }
751d24d4204SJose E. Roman 
752d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
753d71ae5a4SJacob Faibussowitsch {
754d24d4204SJose E. Roman   PetscFunctionBegin;
7559566063dSJacob Faibussowitsch   PetscCall(MatSolve_ScaLAPACK(A, B, X));
7569566063dSJacob Faibussowitsch   PetscCall(VecAXPY(X, 1, Y));
7573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
758d24d4204SJose E. Roman }
759d24d4204SJose E. Roman 
760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
761d71ae5a4SJacob Faibussowitsch {
762d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
763d24d4204SJose E. Roman   PetscBool      flg1, flg2;
764d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
765d24d4204SJose E. Roman 
766d24d4204SJose E. Roman   PetscFunctionBegin;
7679566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
7689566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
76908401ef6SPierre Jolivet   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
770d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B, 1, X, 2);
771d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)B->data;
772d24d4204SJose E. Roman   x = (Mat_ScaLAPACK *)X->data;
7739566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
774d24d4204SJose E. Roman 
775d24d4204SJose E. Roman   switch (A->factortype) {
776d24d4204SJose E. Roman   case MAT_FACTOR_LU:
777792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
778d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
779d24d4204SJose E. Roman     break;
780d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
781792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
782d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
783d24d4204SJose E. Roman     break;
784d71ae5a4SJacob Faibussowitsch   default:
785d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
786d24d4204SJose E. Roman   }
7873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
788d24d4204SJose E. Roman }
789d24d4204SJose E. Roman 
790d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
791d71ae5a4SJacob Faibussowitsch {
792d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
793d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
794d24d4204SJose E. Roman 
795d24d4204SJose E. Roman   PetscFunctionBegin;
796aa624791SPierre Jolivet   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
797792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
798d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf", info);
799d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
800d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
801d24d4204SJose E. Roman 
8029566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
8039566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
805d24d4204SJose E. Roman }
806d24d4204SJose E. Roman 
807d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
808d71ae5a4SJacob Faibussowitsch {
809d24d4204SJose E. Roman   PetscFunctionBegin;
8109566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8119566063dSJacob Faibussowitsch   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
8123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
813d24d4204SJose E. Roman }
814d24d4204SJose E. Roman 
815d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
816d71ae5a4SJacob Faibussowitsch {
817d24d4204SJose E. Roman   PetscFunctionBegin;
818d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
820d24d4204SJose E. Roman }
821d24d4204SJose E. Roman 
822d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
823d71ae5a4SJacob Faibussowitsch {
824d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
825d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
826d24d4204SJose E. Roman 
827d24d4204SJose E. Roman   PetscFunctionBegin;
828792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
829d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf", info);
830d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
831d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
832d24d4204SJose E. Roman 
8339566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
8349566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
836d24d4204SJose E. Roman }
837d24d4204SJose E. Roman 
838d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
839d71ae5a4SJacob Faibussowitsch {
840d24d4204SJose E. Roman   PetscFunctionBegin;
8419566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8429566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
8433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
844d24d4204SJose E. Roman }
845d24d4204SJose E. Roman 
846d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
847d71ae5a4SJacob Faibussowitsch {
848d24d4204SJose E. Roman   PetscFunctionBegin;
849d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
851d24d4204SJose E. Roman }
852d24d4204SJose E. Roman 
85366976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
854d71ae5a4SJacob Faibussowitsch {
855d24d4204SJose E. Roman   PetscFunctionBegin;
856d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
8573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
858d24d4204SJose E. Roman }
859d24d4204SJose E. Roman 
860d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
861d71ae5a4SJacob Faibussowitsch {
862d24d4204SJose E. Roman   Mat            B;
86359172f18SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
864d24d4204SJose E. Roman 
865d24d4204SJose E. Roman   PetscFunctionBegin;
866d24d4204SJose E. Roman   /* Create the factorization matrix */
8679566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
86866e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
869d24d4204SJose E. Roman   B->factortype      = ftype;
8709566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
8719566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
872d24d4204SJose E. Roman 
8739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
874d24d4204SJose E. Roman   *F = B;
8753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
876d24d4204SJose E. Roman }
877d24d4204SJose E. Roman 
878d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
879d71ae5a4SJacob Faibussowitsch {
880d24d4204SJose E. Roman   PetscFunctionBegin;
8819566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
8829566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
8833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
884d24d4204SJose E. Roman }
885d24d4204SJose E. Roman 
886d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
887d71ae5a4SJacob Faibussowitsch {
888d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
889d24d4204SJose E. Roman   PetscBLASInt   one = 1, lwork = 0;
890d24d4204SJose E. Roman   const char    *ntype;
891d68f4f38SPierre Jolivet   PetscScalar   *work = NULL, dummy;
892d24d4204SJose E. Roman 
893d24d4204SJose E. Roman   PetscFunctionBegin;
894d24d4204SJose E. Roman   switch (type) {
895d24d4204SJose E. Roman   case NORM_1:
896d24d4204SJose E. Roman     ntype = "1";
897d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
898d24d4204SJose E. Roman     break;
899d24d4204SJose E. Roman   case NORM_FROBENIUS:
900d24d4204SJose E. Roman     ntype = "F";
901d24d4204SJose E. Roman     work  = &dummy;
902d24d4204SJose E. Roman     break;
903d24d4204SJose E. Roman   case NORM_INFINITY:
904d24d4204SJose E. Roman     ntype = "I";
905d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
906d24d4204SJose E. Roman     break;
907d71ae5a4SJacob Faibussowitsch   default:
908d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
909d24d4204SJose E. Roman   }
9109566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
911d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
9129566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscFree(work));
9133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
914d24d4204SJose E. Roman }
915d24d4204SJose E. Roman 
916d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
917d71ae5a4SJacob Faibussowitsch {
918d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
919d24d4204SJose E. Roman 
920d24d4204SJose E. Roman   PetscFunctionBegin;
9219566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
9223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
923d24d4204SJose E. Roman }
924d24d4204SJose E. Roman 
925d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
926d71ae5a4SJacob Faibussowitsch {
927d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
928d24d4204SJose E. Roman   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
929d24d4204SJose E. Roman 
930d24d4204SJose E. Roman   PetscFunctionBegin;
931d24d4204SJose E. Roman   if (rows) {
932d24d4204SJose E. Roman     n     = a->locr;
933d24d4204SJose E. Roman     nb    = a->mb;
934d24d4204SJose E. Roman     isrc  = a->rsrc;
935d24d4204SJose E. Roman     nproc = a->grid->nprow;
936d24d4204SJose E. Roman     iproc = a->grid->myrow;
9379566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
938d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9399566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
940d24d4204SJose E. Roman   }
941d24d4204SJose E. Roman   if (cols) {
942d24d4204SJose E. Roman     n     = a->locc;
943d24d4204SJose E. Roman     nb    = a->nb;
944d24d4204SJose E. Roman     isrc  = a->csrc;
945d24d4204SJose E. Roman     nproc = a->grid->npcol;
946d24d4204SJose E. Roman     iproc = a->grid->mycol;
9479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
948d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9499566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
950d24d4204SJose E. Roman   }
9513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
952d24d4204SJose E. Roman }
953d24d4204SJose E. Roman 
954d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
955d71ae5a4SJacob Faibussowitsch {
956d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
957d24d4204SJose E. Roman   Mat                Bmpi;
958d24d4204SJose E. Roman   MPI_Comm           comm;
959a00b6df3SJose E. Roman   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
9604b1a79daSJose E. Roman   const PetscInt    *ranges, *branges, *cwork;
9614b1a79daSJose E. Roman   const PetscScalar *vwork;
962d24d4204SJose E. Roman   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
963d24d4204SJose E. Roman   PetscScalar       *barray;
9644b1a79daSJose E. Roman   PetscBool          differ = PETSC_FALSE;
9654b1a79daSJose E. Roman   PetscMPIInt        size;
966d24d4204SJose E. Roman 
967d24d4204SJose E. Roman   PetscFunctionBegin;
9689566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
9704b1a79daSJose E. Roman 
9714b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
9739566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
9749371c9d4SSatish Balay     for (i = 0; i < size; i++)
9759371c9d4SSatish Balay       if (ranges[i + 1] != branges[i + 1]) {
9769371c9d4SSatish Balay         differ = PETSC_TRUE;
9779371c9d4SSatish Balay         break;
9789371c9d4SSatish Balay       }
9794b1a79daSJose E. Roman   }
9804b1a79daSJose E. Roman 
9814b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
9829566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
9834b1a79daSJose E. Roman     m = PETSC_DECIDE;
9849566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
9854b1a79daSJose E. Roman     n = PETSC_DECIDE;
9869566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
9879566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
9889566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATDENSE));
9899566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
9904b1a79daSJose E. Roman 
9914b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9929566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
993a00b6df3SJose E. Roman     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
994a00b6df3SJose E. Roman     lld = PetscMax(ldb, 1); /* local leading dimension */
995792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
9964b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
9974b1a79daSJose E. Roman 
9984b1a79daSJose E. Roman     /* redistribute matrix */
9999566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
1000792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10019566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
10029566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10039566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
10044b1a79daSJose E. Roman 
10054b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
10069566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
10074b1a79daSJose E. Roman     for (i = rstart; i < rend; i++) {
10089566063dSJacob Faibussowitsch       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
10099566063dSJacob Faibussowitsch       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
10109566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
10114b1a79daSJose E. Roman     }
10129566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
10139566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
10149566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Bmpi));
10154b1a79daSJose E. Roman 
10164b1a79daSJose E. Roman   } else { /* normal cases */
1017d24d4204SJose E. Roman 
1018d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1019d24d4204SJose E. Roman     else {
10209566063dSJacob Faibussowitsch       PetscCall(MatCreate(comm, &Bmpi));
102192c846b4SJose E. Roman       m = PETSC_DECIDE;
10229566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
102392c846b4SJose E. Roman       n = PETSC_DECIDE;
10249566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10259566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10269566063dSJacob Faibussowitsch       PetscCall(MatSetType(Bmpi, MATDENSE));
10279566063dSJacob Faibussowitsch       PetscCall(MatSetUp(Bmpi));
1028d24d4204SJose E. Roman     }
1029d24d4204SJose E. Roman 
1030d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
10319566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1032a00b6df3SJose E. Roman     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1033a00b6df3SJose E. Roman     lld = PetscMax(ldb, 1); /* local leading dimension */
1034792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1035d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1036d24d4204SJose E. Roman 
1037d24d4204SJose E. Roman     /* redistribute matrix */
10389566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
1039792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10409566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1041d24d4204SJose E. Roman 
10429566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10439566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1044d24d4204SJose E. Roman     if (reuse == MAT_INPLACE_MATRIX) {
10459566063dSJacob Faibussowitsch       PetscCall(MatHeaderReplace(A, &Bmpi));
1046d24d4204SJose E. Roman     } else *B = Bmpi;
10474b1a79daSJose E. Roman   }
10483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1049d24d4204SJose E. Roman }
1050d24d4204SJose E. Roman 
1051d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1052d71ae5a4SJacob Faibussowitsch {
1053b12397e7SPierre Jolivet   const PetscInt *ranges;
1054b12397e7SPierre Jolivet   PetscMPIInt     size;
1055b12397e7SPierre Jolivet   PetscInt        i, n;
1056b12397e7SPierre Jolivet 
1057b12397e7SPierre Jolivet   PetscFunctionBegin;
1058b12397e7SPierre Jolivet   *correct = PETSC_TRUE;
1059b12397e7SPierre Jolivet   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1060b12397e7SPierre Jolivet   if (size > 1) {
1061b12397e7SPierre Jolivet     PetscCall(PetscLayoutGetRanges(map, &ranges));
1062b12397e7SPierre Jolivet     n = ranges[1] - ranges[0];
10639371c9d4SSatish Balay     for (i = 1; i < size; i++)
10649371c9d4SSatish Balay       if (ranges[i + 1] - ranges[i] != n) break;
1065b12397e7SPierre Jolivet     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1066b12397e7SPierre Jolivet   }
10673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1068b12397e7SPierre Jolivet }
1069b12397e7SPierre Jolivet 
1070d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1071d71ae5a4SJacob Faibussowitsch {
1072d24d4204SJose E. Roman   Mat_ScaLAPACK  *b;
1073d24d4204SJose E. Roman   Mat             Bmpi;
1074d24d4204SJose E. Roman   MPI_Comm        comm;
107592c846b4SJose E. Roman   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
1076b12397e7SPierre Jolivet   const PetscInt *ranges, *rows, *cols;
1077d24d4204SJose E. Roman   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
1078d24d4204SJose E. Roman   PetscScalar    *aarray;
1079b12397e7SPierre Jolivet   IS              ir, ic;
10804e8b52a1SJose E. Roman   PetscInt        lda;
1081b12397e7SPierre Jolivet   PetscBool       flg;
1082d24d4204SJose E. Roman 
1083d24d4204SJose E. Roman   PetscFunctionBegin;
10849566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1085d24d4204SJose E. Roman 
1086d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1087d24d4204SJose E. Roman   else {
10889566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
108992c846b4SJose E. Roman     m = PETSC_DECIDE;
10909566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
109192c846b4SJose E. Roman     n = PETSC_DECIDE;
10929566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10939566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10949566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
10959566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
1096d24d4204SJose E. Roman   }
1097d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)Bmpi->data;
1098d24d4204SJose E. Roman 
1099b12397e7SPierre Jolivet   PetscCall(MatDenseGetLDA(A, &lda));
1100b12397e7SPierre Jolivet   PetscCall(MatDenseGetArray(A, &aarray));
1101b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1102b12397e7SPierre Jolivet   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1103b12397e7SPierre Jolivet   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1104d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for A (1d block distribution) */
11059566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
11069566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
11074e8b52a1SJose E. Roman     lld = PetscMax(lda, 1);                       /* local leading dimension */
1108792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1109d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1110d24d4204SJose E. Roman 
1111d24d4204SJose E. Roman     /* redistribute matrix */
1112792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1113b12397e7SPierre Jolivet     Bmpi->nooffprocentries = PETSC_TRUE;
1114b12397e7SPierre Jolivet   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1115b12397e7SPierre Jolivet     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1116b12397e7SPierre Jolivet     b->roworiented = PETSC_FALSE;
1117b12397e7SPierre Jolivet     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1118b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ir, &rows));
1119b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ic, &cols));
1120b12397e7SPierre Jolivet     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1121b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ir, &rows));
1122b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ic, &cols));
1123b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ic));
1124b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ir));
1125b12397e7SPierre Jolivet   }
11269566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A, &aarray));
11279566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
11289566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1129d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
11309566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &Bmpi));
1131d24d4204SJose E. Roman   } else *B = Bmpi;
11323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1133d24d4204SJose E. Roman }
1134d24d4204SJose E. Roman 
1135d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1136d71ae5a4SJacob Faibussowitsch {
1137d24d4204SJose E. Roman   Mat                mat_scal;
1138d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1139d24d4204SJose E. Roman   const PetscInt    *cols;
1140d24d4204SJose E. Roman   const PetscScalar *vals;
1141d24d4204SJose E. Roman 
1142d24d4204SJose E. Roman   PetscFunctionBegin;
1143d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1144d24d4204SJose E. Roman     mat_scal = *newmat;
11459566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1146d24d4204SJose E. Roman   } else {
11479566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1148d24d4204SJose E. Roman     m = PETSC_DECIDE;
11499566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1150d24d4204SJose E. Roman     n = PETSC_DECIDE;
11519566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
11529566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
11539566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
11549566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1155d24d4204SJose E. Roman   }
1156d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
11579566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11589566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
11599566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1160d24d4204SJose E. Roman   }
11619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
11629566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1163d24d4204SJose E. Roman 
11649566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1165d24d4204SJose E. Roman   else *newmat = mat_scal;
11663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1167d24d4204SJose E. Roman }
1168d24d4204SJose E. Roman 
1169d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1170d71ae5a4SJacob Faibussowitsch {
1171d24d4204SJose E. Roman   Mat                mat_scal;
1172d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1173d24d4204SJose E. Roman   const PetscInt    *cols;
1174d24d4204SJose E. Roman   const PetscScalar *vals;
1175d24d4204SJose E. Roman   PetscScalar        v;
1176d24d4204SJose E. Roman 
1177d24d4204SJose E. Roman   PetscFunctionBegin;
1178d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1179d24d4204SJose E. Roman     mat_scal = *newmat;
11809566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1181d24d4204SJose E. Roman   } else {
11829566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1183d24d4204SJose E. Roman     m = PETSC_DECIDE;
11849566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1185d24d4204SJose E. Roman     n = PETSC_DECIDE;
11869566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
11879566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
11889566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
11899566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1190d24d4204SJose E. Roman   }
11919566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
1192d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
11939566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11949566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1195d24d4204SJose E. Roman     for (j = 0; j < ncols; j++) { /* lower triangular part */
1196d24d4204SJose E. Roman       if (cols[j] == row) continue;
1197b94d7dedSBarry Smith       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
11989566063dSJacob Faibussowitsch       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1199d24d4204SJose E. Roman     }
12009566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1201d24d4204SJose E. Roman   }
12029566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
12039566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
12049566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1205d24d4204SJose E. Roman 
12069566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1207d24d4204SJose E. Roman   else *newmat = mat_scal;
12083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1209d24d4204SJose E. Roman }
1210d24d4204SJose E. Roman 
1211d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1212d71ae5a4SJacob Faibussowitsch {
1213d24d4204SJose E. Roman   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1214d24d4204SJose E. Roman   PetscInt       sz = 0;
1215d24d4204SJose E. Roman 
1216d24d4204SJose E. Roman   PetscFunctionBegin;
12179566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12189566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1219d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1220d24d4204SJose E. Roman 
12219566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
12229566063dSJacob Faibussowitsch   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
12239566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(sz, &a->loc));
1224d24d4204SJose E. Roman 
1225d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
12263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1227d24d4204SJose E. Roman }
1228d24d4204SJose E. Roman 
1229d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1230d71ae5a4SJacob Faibussowitsch {
1231d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1232d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1233d24d4204SJose E. Roman   PetscBool           flg;
1234d24d4204SJose E. Roman   MPI_Comm            icomm;
1235d24d4204SJose E. Roman 
1236d24d4204SJose E. Roman   PetscFunctionBegin;
12379566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&A->stash));
12389566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
12399566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->pivots));
12409566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
12419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1242d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1243d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1244d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1245d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
12469566063dSJacob Faibussowitsch     PetscCall(PetscFree(grid));
12479566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1248d24d4204SJose E. Roman   }
12499566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
12509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
12519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
12529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
12539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
12549566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
12553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1256d24d4204SJose E. Roman }
1257d24d4204SJose E. Roman 
125866976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1259d71ae5a4SJacob Faibussowitsch {
1260d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1261d24d4204SJose E. Roman   PetscBLASInt   info = 0;
1262b12397e7SPierre Jolivet   PetscBool      flg;
1263d24d4204SJose E. Roman 
1264d24d4204SJose E. Roman   PetscFunctionBegin;
12659566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12669566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1267d24d4204SJose E. Roman 
1268b12397e7SPierre Jolivet   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1269b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1270b12397e7SPierre Jolivet   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1271b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1272b12397e7SPierre Jolivet   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1273d24d4204SJose E. Roman 
1274d24d4204SJose E. Roman   /* compute local sizes */
12759566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
12769566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1277d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1278d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1279d24d4204SJose E. Roman   a->lld  = PetscMax(1, a->locr);
1280d24d4204SJose E. Roman 
1281d24d4204SJose E. Roman   /* allocate local array */
12829566063dSJacob Faibussowitsch   PetscCall(MatScaLAPACKSetPreallocation(A));
1283d24d4204SJose E. Roman 
1284d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1285792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1286d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
12873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1288d24d4204SJose E. Roman }
1289d24d4204SJose E. Roman 
129066976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1291d71ae5a4SJacob Faibussowitsch {
1292d24d4204SJose E. Roman   PetscInt nstash, reallocs;
1293d24d4204SJose E. Roman 
1294d24d4204SJose E. Roman   PetscFunctionBegin;
12953ba16761SJacob Faibussowitsch   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
12969566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
12979566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
12989566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
12993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1300d24d4204SJose E. Roman }
1301d24d4204SJose E. Roman 
130266976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1303d71ae5a4SJacob Faibussowitsch {
1304d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1305d24d4204SJose E. Roman   PetscMPIInt    n;
1306d24d4204SJose E. Roman   PetscInt       i, flg, *row, *col;
1307d24d4204SJose E. Roman   PetscScalar   *val;
1308d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1309d24d4204SJose E. Roman 
1310d24d4204SJose E. Roman   PetscFunctionBegin;
13113ba16761SJacob Faibussowitsch   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1312d24d4204SJose E. Roman   while (1) {
13139566063dSJacob Faibussowitsch     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1314d24d4204SJose E. Roman     if (!flg) break;
1315d24d4204SJose E. Roman     for (i = 0; i < n; i++) {
13169566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
13179566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1318792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1319aed4548fSBarry Smith       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1320d24d4204SJose E. Roman       switch (A->insertmode) {
1321d71ae5a4SJacob Faibussowitsch       case INSERT_VALUES:
1322d71ae5a4SJacob Faibussowitsch         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1323d71ae5a4SJacob Faibussowitsch         break;
1324d71ae5a4SJacob Faibussowitsch       case ADD_VALUES:
1325d71ae5a4SJacob Faibussowitsch         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1326d71ae5a4SJacob Faibussowitsch         break;
1327d71ae5a4SJacob Faibussowitsch       default:
1328d71ae5a4SJacob Faibussowitsch         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1329d24d4204SJose E. Roman       }
1330d24d4204SJose E. Roman     }
1331d24d4204SJose E. Roman   }
13329566063dSJacob Faibussowitsch   PetscCall(MatStashScatterEnd_Private(&A->stash));
13333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1334d24d4204SJose E. Roman }
1335d24d4204SJose E. Roman 
133666976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1337d71ae5a4SJacob Faibussowitsch {
1338d24d4204SJose E. Roman   Mat      Adense, As;
1339d24d4204SJose E. Roman   MPI_Comm comm;
1340d24d4204SJose E. Roman 
1341d24d4204SJose E. Roman   PetscFunctionBegin;
13429566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
13439566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Adense));
13449566063dSJacob Faibussowitsch   PetscCall(MatSetType(Adense, MATDENSE));
13459566063dSJacob Faibussowitsch   PetscCall(MatLoad(Adense, viewer));
13469566063dSJacob Faibussowitsch   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
13479566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
13489566063dSJacob Faibussowitsch   PetscCall(MatHeaderReplace(newMat, &As));
13493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1350d24d4204SJose E. Roman }
1351d24d4204SJose E. Roman 
13529371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1353d24d4204SJose E. Roman                                        0,
1354d24d4204SJose E. Roman                                        0,
1355d24d4204SJose E. Roman                                        MatMult_ScaLAPACK,
1356d24d4204SJose E. Roman                                        /* 4*/ MatMultAdd_ScaLAPACK,
1357d24d4204SJose E. Roman                                        MatMultTranspose_ScaLAPACK,
1358d24d4204SJose E. Roman                                        MatMultTransposeAdd_ScaLAPACK,
1359d24d4204SJose E. Roman                                        MatSolve_ScaLAPACK,
1360d24d4204SJose E. Roman                                        MatSolveAdd_ScaLAPACK,
1361d24d4204SJose E. Roman                                        0,
1362d24d4204SJose E. Roman                                        /*10*/ 0,
1363d24d4204SJose E. Roman                                        MatLUFactor_ScaLAPACK,
1364d24d4204SJose E. Roman                                        MatCholeskyFactor_ScaLAPACK,
1365d24d4204SJose E. Roman                                        0,
1366d24d4204SJose E. Roman                                        MatTranspose_ScaLAPACK,
1367d24d4204SJose E. Roman                                        /*15*/ MatGetInfo_ScaLAPACK,
1368d24d4204SJose E. Roman                                        0,
1369d24d4204SJose E. Roman                                        MatGetDiagonal_ScaLAPACK,
1370d24d4204SJose E. Roman                                        MatDiagonalScale_ScaLAPACK,
1371d24d4204SJose E. Roman                                        MatNorm_ScaLAPACK,
1372d24d4204SJose E. Roman                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1373d24d4204SJose E. Roman                                        MatAssemblyEnd_ScaLAPACK,
1374d24d4204SJose E. Roman                                        MatSetOption_ScaLAPACK,
1375d24d4204SJose E. Roman                                        MatZeroEntries_ScaLAPACK,
1376d24d4204SJose E. Roman                                        /*24*/ 0,
1377d24d4204SJose E. Roman                                        MatLUFactorSymbolic_ScaLAPACK,
1378d24d4204SJose E. Roman                                        MatLUFactorNumeric_ScaLAPACK,
1379d24d4204SJose E. Roman                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1380d24d4204SJose E. Roman                                        MatCholeskyFactorNumeric_ScaLAPACK,
1381d24d4204SJose E. Roman                                        /*29*/ MatSetUp_ScaLAPACK,
1382d24d4204SJose E. Roman                                        0,
1383d24d4204SJose E. Roman                                        0,
1384d24d4204SJose E. Roman                                        0,
1385d24d4204SJose E. Roman                                        0,
1386d24d4204SJose E. Roman                                        /*34*/ MatDuplicate_ScaLAPACK,
1387d24d4204SJose E. Roman                                        0,
1388d24d4204SJose E. Roman                                        0,
1389d24d4204SJose E. Roman                                        0,
1390d24d4204SJose E. Roman                                        0,
1391d24d4204SJose E. Roman                                        /*39*/ MatAXPY_ScaLAPACK,
1392d24d4204SJose E. Roman                                        0,
1393d24d4204SJose E. Roman                                        0,
1394d24d4204SJose E. Roman                                        0,
1395d24d4204SJose E. Roman                                        MatCopy_ScaLAPACK,
1396d24d4204SJose E. Roman                                        /*44*/ 0,
1397d24d4204SJose E. Roman                                        MatScale_ScaLAPACK,
1398d24d4204SJose E. Roman                                        MatShift_ScaLAPACK,
1399d24d4204SJose E. Roman                                        0,
1400d24d4204SJose E. Roman                                        0,
1401d24d4204SJose E. Roman                                        /*49*/ 0,
1402d24d4204SJose E. Roman                                        0,
1403d24d4204SJose E. Roman                                        0,
1404d24d4204SJose E. Roman                                        0,
1405d24d4204SJose E. Roman                                        0,
1406d24d4204SJose E. Roman                                        /*54*/ 0,
1407d24d4204SJose E. Roman                                        0,
1408d24d4204SJose E. Roman                                        0,
1409d24d4204SJose E. Roman                                        0,
1410d24d4204SJose E. Roman                                        0,
1411d24d4204SJose E. Roman                                        /*59*/ 0,
1412d24d4204SJose E. Roman                                        MatDestroy_ScaLAPACK,
1413d24d4204SJose E. Roman                                        MatView_ScaLAPACK,
1414d24d4204SJose E. Roman                                        0,
1415d24d4204SJose E. Roman                                        0,
1416d24d4204SJose E. Roman                                        /*64*/ 0,
1417d24d4204SJose E. Roman                                        0,
1418d24d4204SJose E. Roman                                        0,
1419d24d4204SJose E. Roman                                        0,
1420d24d4204SJose E. Roman                                        0,
1421d24d4204SJose E. Roman                                        /*69*/ 0,
1422d24d4204SJose E. Roman                                        0,
1423d24d4204SJose E. Roman                                        MatConvert_ScaLAPACK_Dense,
1424d24d4204SJose E. Roman                                        0,
1425d24d4204SJose E. Roman                                        0,
1426d24d4204SJose E. Roman                                        /*74*/ 0,
1427d24d4204SJose E. Roman                                        0,
1428d24d4204SJose E. Roman                                        0,
1429d24d4204SJose E. Roman                                        0,
1430d24d4204SJose E. Roman                                        0,
1431d24d4204SJose E. Roman                                        /*79*/ 0,
1432d24d4204SJose E. Roman                                        0,
1433d24d4204SJose E. Roman                                        0,
1434d24d4204SJose E. Roman                                        0,
1435d24d4204SJose E. Roman                                        MatLoad_ScaLAPACK,
1436d24d4204SJose E. Roman                                        /*84*/ 0,
1437d24d4204SJose E. Roman                                        0,
1438d24d4204SJose E. Roman                                        0,
1439d24d4204SJose E. Roman                                        0,
1440d24d4204SJose E. Roman                                        0,
1441d24d4204SJose E. Roman                                        /*89*/ 0,
1442d24d4204SJose E. Roman                                        0,
1443d24d4204SJose E. Roman                                        MatMatMultNumeric_ScaLAPACK,
1444d24d4204SJose E. Roman                                        0,
1445d24d4204SJose E. Roman                                        0,
1446d24d4204SJose E. Roman                                        /*94*/ 0,
1447d24d4204SJose E. Roman                                        0,
1448d24d4204SJose E. Roman                                        0,
1449d24d4204SJose E. Roman                                        MatMatTransposeMultNumeric_ScaLAPACK,
1450d24d4204SJose E. Roman                                        0,
1451d24d4204SJose E. Roman                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1452d24d4204SJose E. Roman                                        0,
1453d24d4204SJose E. Roman                                        0,
1454d24d4204SJose E. Roman                                        MatConjugate_ScaLAPACK,
1455d24d4204SJose E. Roman                                        0,
1456d24d4204SJose E. Roman                                        /*104*/ 0,
1457d24d4204SJose E. Roman                                        0,
1458d24d4204SJose E. Roman                                        0,
1459d24d4204SJose E. Roman                                        0,
1460d24d4204SJose E. Roman                                        0,
1461d24d4204SJose E. Roman                                        /*109*/ MatMatSolve_ScaLAPACK,
1462d24d4204SJose E. Roman                                        0,
1463d24d4204SJose E. Roman                                        0,
1464d24d4204SJose E. Roman                                        0,
1465d24d4204SJose E. Roman                                        MatMissingDiagonal_ScaLAPACK,
1466d24d4204SJose E. Roman                                        /*114*/ 0,
1467d24d4204SJose E. Roman                                        0,
1468d24d4204SJose E. Roman                                        0,
1469d24d4204SJose E. Roman                                        0,
1470d24d4204SJose E. Roman                                        0,
1471d24d4204SJose E. Roman                                        /*119*/ 0,
1472d24d4204SJose E. Roman                                        MatHermitianTranspose_ScaLAPACK,
1473*5e4d33a3SBlanca Mellado Pinto                                        MatMultHermitianTranspose_ScaLAPACK,
1474*5e4d33a3SBlanca Mellado Pinto                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1475d24d4204SJose E. Roman                                        0,
1476d24d4204SJose E. Roman                                        /*124*/ 0,
1477d24d4204SJose E. Roman                                        0,
1478d24d4204SJose E. Roman                                        0,
1479d24d4204SJose E. Roman                                        0,
1480d24d4204SJose E. Roman                                        0,
1481d24d4204SJose E. Roman                                        /*129*/ 0,
1482d24d4204SJose E. Roman                                        0,
1483d24d4204SJose E. Roman                                        0,
1484d24d4204SJose E. Roman                                        0,
1485d24d4204SJose E. Roman                                        0,
1486d24d4204SJose E. Roman                                        /*134*/ 0,
1487d24d4204SJose E. Roman                                        0,
1488d24d4204SJose E. Roman                                        0,
1489d24d4204SJose E. Roman                                        0,
1490d24d4204SJose E. Roman                                        0,
1491d24d4204SJose E. Roman                                        0,
1492d24d4204SJose E. Roman                                        /*140*/ 0,
1493d24d4204SJose E. Roman                                        0,
1494d24d4204SJose E. Roman                                        0,
1495d24d4204SJose E. Roman                                        0,
1496d24d4204SJose E. Roman                                        0,
1497d24d4204SJose E. Roman                                        /*145*/ 0,
1498d24d4204SJose E. Roman                                        0,
149999a7f59eSMark Adams                                        0,
150099a7f59eSMark Adams                                        0,
15017fb60732SBarry Smith                                        0,
1502ed6168d8SPierre Jolivet                                        /*150*/ 0,
1503ed6168d8SPierre Jolivet                                        0};
1504d24d4204SJose E. Roman 
1505d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1506d71ae5a4SJacob Faibussowitsch {
1507d24d4204SJose E. Roman   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1508d24d4204SJose E. Roman   PetscInt           size = stash->size, nsends;
1509d24d4204SJose E. Roman   PetscInt           count, *sindices, **rindices, i, j, l;
1510d24d4204SJose E. Roman   PetscScalar      **rvalues, *svalues;
1511d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1512d24d4204SJose E. Roman   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1513d24d4204SJose E. Roman   PetscMPIInt       *sizes, *nlengths, nreceives;
1514d24d4204SJose E. Roman   PetscInt          *sp_idx, *sp_idy;
1515d24d4204SJose E. Roman   PetscScalar       *sp_val;
1516d24d4204SJose E. Roman   PetscMatStashSpace space, space_next;
1517d24d4204SJose E. Roman   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1518d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1519d24d4204SJose E. Roman 
1520d24d4204SJose E. Roman   PetscFunctionBegin;
1521d24d4204SJose E. Roman   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1522d24d4204SJose E. Roman     InsertMode addv;
15231c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
152408401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1525d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1526d24d4204SJose E. Roman   }
1527d24d4204SJose E. Roman 
1528d24d4204SJose E. Roman   bs2 = stash->bs * stash->bs;
1529d24d4204SJose E. Roman 
1530d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
15319566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
15329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1533d24d4204SJose E. Roman 
1534d24d4204SJose E. Roman   i = j = 0;
1535d24d4204SJose E. Roman   space = stash->space_head;
1536d24d4204SJose E. Roman   while (space) {
1537d24d4204SJose E. Roman     space_next = space->next;
1538d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
15399566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
15409566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1541792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1542d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
15439371c9d4SSatish Balay       nlengths[j]++;
15449371c9d4SSatish Balay       owner[i] = j;
1545d24d4204SJose E. Roman       i++;
1546d24d4204SJose E. Roman     }
1547d24d4204SJose E. Roman     space = space_next;
1548d24d4204SJose E. Roman   }
1549d24d4204SJose E. Roman 
1550d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
15519566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
1552d24d4204SJose E. Roman   for (i = 0, nsends = 0; i < size; i++) {
1553d24d4204SJose E. Roman     if (nlengths[i]) {
15549371c9d4SSatish Balay       sizes[i] = 1;
15559371c9d4SSatish Balay       nsends++;
1556d24d4204SJose E. Roman     }
1557d24d4204SJose E. Roman   }
1558d24d4204SJose E. Roman 
15599371c9d4SSatish Balay   {
15609371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
1561d24d4204SJose E. Roman     /* Determine the number of messages to expect, their lengths, from from-ids */
15629566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
15639566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1564d24d4204SJose E. Roman     /* since clubbing row,col - lengths are multiplied by 2 */
1565d24d4204SJose E. Roman     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
15669566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1567d24d4204SJose E. Roman     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1568d24d4204SJose E. Roman     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
15699566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
15709566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
15719371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
15729371c9d4SSatish Balay   }
1573d24d4204SJose E. Roman 
1574d24d4204SJose E. Roman   /* do sends:
1575d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1576d24d4204SJose E. Roman          the ith processor
1577d24d4204SJose E. Roman   */
15789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
15799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
15809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1581d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
15829371c9d4SSatish Balay   startv[0] = 0;
15839371c9d4SSatish Balay   starti[0] = 0;
1584d24d4204SJose E. Roman   for (i = 1; i < size; i++) {
1585d24d4204SJose E. Roman     startv[i] = startv[i - 1] + nlengths[i - 1];
1586d24d4204SJose E. Roman     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1587d24d4204SJose E. Roman   }
1588d24d4204SJose E. Roman 
1589d24d4204SJose E. Roman   i     = 0;
1590d24d4204SJose E. Roman   space = stash->space_head;
1591d24d4204SJose E. Roman   while (space) {
1592d24d4204SJose E. Roman     space_next = space->next;
1593d24d4204SJose E. Roman     sp_idx     = space->idx;
1594d24d4204SJose E. Roman     sp_idy     = space->idy;
1595d24d4204SJose E. Roman     sp_val     = space->val;
1596d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
1597d24d4204SJose E. Roman       j = owner[i];
1598d24d4204SJose E. Roman       if (bs2 == 1) {
1599d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1600d24d4204SJose E. Roman       } else {
1601d24d4204SJose E. Roman         PetscInt     k;
1602d24d4204SJose E. Roman         PetscScalar *buf1, *buf2;
1603d24d4204SJose E. Roman         buf1 = svalues + bs2 * startv[j];
1604d24d4204SJose E. Roman         buf2 = space->val + bs2 * l;
1605d24d4204SJose E. Roman         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1606d24d4204SJose E. Roman       }
1607d24d4204SJose E. Roman       sindices[starti[j]]               = sp_idx[l];
1608d24d4204SJose E. Roman       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1609d24d4204SJose E. Roman       startv[j]++;
1610d24d4204SJose E. Roman       starti[j]++;
1611d24d4204SJose E. Roman       i++;
1612d24d4204SJose E. Roman     }
1613d24d4204SJose E. Roman     space = space_next;
1614d24d4204SJose E. Roman   }
1615d24d4204SJose E. Roman   startv[0] = 0;
1616d24d4204SJose E. Roman   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1617d24d4204SJose E. Roman 
1618d24d4204SJose E. Roman   for (i = 0, count = 0; i < size; i++) {
1619d24d4204SJose E. Roman     if (sizes[i]) {
16209566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
16219566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1622d24d4204SJose E. Roman     }
1623d24d4204SJose E. Roman   }
1624d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
16259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1626d24d4204SJose E. Roman   for (i = 0; i < size; i++) {
162748a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1628d24d4204SJose E. Roman   }
1629d24d4204SJose E. Roman #endif
16309566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
16319566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
16329566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
16339566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
1634d24d4204SJose E. Roman 
1635d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
16369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1637d24d4204SJose E. Roman 
1638d24d4204SJose E. Roman   for (i = 0; i < nreceives; i++) {
1639d24d4204SJose E. Roman     recv_waits[2 * i]     = recv_waits1[i];
1640d24d4204SJose E. Roman     recv_waits[2 * i + 1] = recv_waits2[i];
1641d24d4204SJose E. Roman   }
1642d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1643d24d4204SJose E. Roman 
16449566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
16459566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
1646d24d4204SJose E. Roman 
1647d24d4204SJose E. Roman   stash->svalues         = svalues;
1648d24d4204SJose E. Roman   stash->sindices        = sindices;
1649d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1650d24d4204SJose E. Roman   stash->rindices        = rindices;
1651d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1652d24d4204SJose E. Roman   stash->nsends          = nsends;
1653d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1654d24d4204SJose E. Roman   stash->reproduce_count = 0;
16553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1656d24d4204SJose E. Roman }
1657d24d4204SJose E. Roman 
1658d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1659d71ae5a4SJacob Faibussowitsch {
1660d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1661d24d4204SJose E. Roman 
1662d24d4204SJose E. Roman   PetscFunctionBegin;
166328b400f6SJacob Faibussowitsch   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1664aed4548fSBarry Smith   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1665aed4548fSBarry Smith   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
16669566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
16679566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
16683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1669d24d4204SJose E. Roman }
1670d24d4204SJose E. Roman 
1671d24d4204SJose E. Roman /*@
16726aad120cSJose E. Roman   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
167311a5261eSBarry Smith   the `MATSCALAPACK` matrix
1674d24d4204SJose E. Roman 
1675c3339decSBarry Smith   Logically Collective
1676d24d4204SJose E. Roman 
1677d8d19677SJose E. Roman   Input Parameters:
167811a5261eSBarry Smith + A  - a `MATSCALAPACK` matrix
1679d24d4204SJose E. Roman . mb - the row block size
1680d24d4204SJose E. Roman - nb - the column block size
1681d24d4204SJose E. Roman 
1682d24d4204SJose E. Roman   Level: intermediate
1683d24d4204SJose E. Roman 
16842ef1f0ffSBarry Smith   Note:
16852ef1f0ffSBarry Smith   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
16862ef1f0ffSBarry Smith 
16871cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1688d24d4204SJose E. Roman @*/
1689d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1690d71ae5a4SJacob Faibussowitsch {
1691d24d4204SJose E. Roman   PetscFunctionBegin;
1692d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1693d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, mb, 2);
1694d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, nb, 3);
1695cac4c232SBarry Smith   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
16963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1697d24d4204SJose E. Roman }
1698d24d4204SJose E. Roman 
1699d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1700d71ae5a4SJacob Faibussowitsch {
1701d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1702d24d4204SJose E. Roman 
1703d24d4204SJose E. Roman   PetscFunctionBegin;
1704d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1705d24d4204SJose E. Roman   if (nb) *nb = a->nb;
17063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1707d24d4204SJose E. Roman }
1708d24d4204SJose E. Roman 
1709d24d4204SJose E. Roman /*@
17106aad120cSJose E. Roman   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
171111a5261eSBarry Smith   the `MATSCALAPACK` matrix
1712d24d4204SJose E. Roman 
17132ef1f0ffSBarry Smith   Not Collective
1714d24d4204SJose E. Roman 
1715d24d4204SJose E. Roman   Input Parameter:
171611a5261eSBarry Smith . A - a `MATSCALAPACK` matrix
1717d24d4204SJose E. Roman 
1718d24d4204SJose E. Roman   Output Parameters:
1719d24d4204SJose E. Roman + mb - the row block size
1720d24d4204SJose E. Roman - nb - the column block size
1721d24d4204SJose E. Roman 
1722d24d4204SJose E. Roman   Level: intermediate
1723d24d4204SJose E. Roman 
17242ef1f0ffSBarry Smith   Note:
17252ef1f0ffSBarry Smith   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
17262ef1f0ffSBarry Smith 
17271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1728d24d4204SJose E. Roman @*/
1729d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1730d71ae5a4SJacob Faibussowitsch {
1731d24d4204SJose E. Roman   PetscFunctionBegin;
1732d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1733cac4c232SBarry Smith   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
17343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1735d24d4204SJose E. Roman }
1736d24d4204SJose E. Roman 
1737d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1738d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1739d24d4204SJose E. Roman 
1740d24d4204SJose E. Roman /*MC
1741d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1742d24d4204SJose E. Roman 
17432ef1f0ffSBarry Smith    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1744d24d4204SJose E. Roman 
1745d24d4204SJose E. Roman    Options Database Keys:
17462ef1f0ffSBarry Smith +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
17472ef1f0ffSBarry Smith .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1748d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1749d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1750d24d4204SJose E. Roman 
17512ef1f0ffSBarry Smith    Level: intermediate
17522ef1f0ffSBarry Smith 
175389bba20eSBarry Smith   Note:
175489bba20eSBarry Smith    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
175589bba20eSBarry Smith    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
175689bba20eSBarry Smith    the given rank.
175789bba20eSBarry Smith 
17581cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1759d24d4204SJose E. Roman M*/
1760d24d4204SJose E. Roman 
1761d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1762d71ae5a4SJacob Faibussowitsch {
1763d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1764d24d4204SJose E. Roman   PetscBool           flg, flg1;
1765d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1766d24d4204SJose E. Roman   MPI_Comm            icomm;
1767d24d4204SJose E. Roman   PetscBLASInt        nprow, npcol, myrow, mycol;
1768d24d4204SJose E. Roman   PetscInt            optv1, k = 2, array[2] = {0, 0};
1769d24d4204SJose E. Roman   PetscMPIInt         size;
1770d24d4204SJose E. Roman 
1771d24d4204SJose E. Roman   PetscFunctionBegin;
1772aea10558SJacob Faibussowitsch   A->ops[0]     = MatOps_Values;
1773d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1774d24d4204SJose E. Roman 
17759566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1776d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1777d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1778d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1779d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1780d24d4204SJose E. Roman 
17814dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&a));
1782d24d4204SJose E. Roman   A->data = (void *)a;
1783d24d4204SJose E. Roman 
1784d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1785d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
17869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
17879566063dSJacob Faibussowitsch     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
17889566063dSJacob Faibussowitsch     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1789d24d4204SJose E. Roman   }
17909566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
17919566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1792d24d4204SJose E. Roman   if (!flg) {
17934dfa11a4SJacob Faibussowitsch     PetscCall(PetscNew(&grid));
1794d24d4204SJose E. Roman 
17959566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(icomm, &size));
1796d24d4204SJose E. Roman     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1797d24d4204SJose E. Roman 
1798d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
17999566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1800d24d4204SJose E. Roman     if (flg1) {
180108401ef6SPierre Jolivet       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1802d24d4204SJose E. Roman       grid->nprow = optv1;
1803d24d4204SJose E. Roman     }
1804d0609cedSBarry Smith     PetscOptionsEnd();
1805d24d4204SJose E. Roman 
1806d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1807d24d4204SJose E. Roman     grid->npcol = size / grid->nprow;
18089566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
18099566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1810f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1811d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1812d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1813d24d4204SJose E. Roman     grid->grid_refct = 1;
1814d24d4204SJose E. Roman     grid->nprow      = nprow;
1815d24d4204SJose E. Roman     grid->npcol      = npcol;
1816d24d4204SJose E. Roman     grid->myrow      = myrow;
1817d24d4204SJose E. Roman     grid->mycol      = mycol;
1818d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1819f7ec113fSDamian Marek     grid->ictxrow = Csys2blacs_handle(icomm);
1820d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1821f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1822d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
18239566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1824d24d4204SJose E. Roman 
1825d24d4204SJose E. Roman   } else grid->grid_refct++;
18269566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
1827d24d4204SJose E. Roman   a->grid = grid;
1828d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1829d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1830d24d4204SJose E. Roman 
1831d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
18329566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1833d24d4204SJose E. Roman   if (flg) {
1834d24d4204SJose E. Roman     a->mb = array[0];
1835d24d4204SJose E. Roman     a->nb = (k > 1) ? array[1] : a->mb;
1836d24d4204SJose E. Roman   }
1837d0609cedSBarry Smith   PetscOptionsEnd();
1838d24d4204SJose E. Roman 
1839b12397e7SPierre Jolivet   a->roworiented = PETSC_TRUE;
18409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
18419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
18429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
18439566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
18443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1845d24d4204SJose E. Roman }
1846d24d4204SJose E. Roman 
1847d24d4204SJose E. Roman /*@C
1848d24d4204SJose E. Roman   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
184911a5261eSBarry Smith   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1850d24d4204SJose E. Roman 
1851d24d4204SJose E. Roman   Collective
1852d24d4204SJose E. Roman 
1853d24d4204SJose E. Roman   Input Parameters:
1854d24d4204SJose E. Roman + comm - MPI communicator
185511a5261eSBarry Smith . mb   - row block size (or `PETSC_DECIDE` to have it set)
185611a5261eSBarry Smith . nb   - column block size (or `PETSC_DECIDE` to have it set)
1857d24d4204SJose E. Roman . M    - number of global rows
1858d24d4204SJose E. Roman . N    - number of global columns
1859d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix
1860d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix
1861d24d4204SJose E. Roman 
1862d24d4204SJose E. Roman   Output Parameter:
1863d24d4204SJose E. Roman . A - the matrix
1864d24d4204SJose E. Roman 
186511a5261eSBarry Smith   Options Database Key:
1866d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1867d24d4204SJose E. Roman 
18682ef1f0ffSBarry Smith   Level: intermediate
18692ef1f0ffSBarry Smith 
18702ef1f0ffSBarry Smith   Notes:
18712ef1f0ffSBarry Smith   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
18722ef1f0ffSBarry Smith 
187311a5261eSBarry Smith   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1874d24d4204SJose E. Roman   MatXXXXSetPreallocation() paradigm instead of this routine directly.
187511a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1876d24d4204SJose E. Roman 
1877d24d4204SJose E. Roman   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1878d24d4204SJose E. Roman   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1879d24d4204SJose E. Roman   significance and are thus ignored. The block sizes refer to the values
188011a5261eSBarry Smith   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1881d24d4204SJose E. Roman 
18821cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1883d24d4204SJose E. Roman @*/
1884d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1885d71ae5a4SJacob Faibussowitsch {
1886d24d4204SJose E. Roman   Mat_ScaLAPACK *a;
1887d24d4204SJose E. Roman   PetscInt       m, n;
1888d24d4204SJose E. Roman 
1889d24d4204SJose E. Roman   PetscFunctionBegin;
18909566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
18919566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSCALAPACK));
1892aed4548fSBarry Smith   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1893d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1894d24d4204SJose E. Roman   m = PETSC_DECIDE;
18959566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1896d24d4204SJose E. Roman   n = PETSC_DECIDE;
18979566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
18989566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1899d24d4204SJose E. Roman   a = (Mat_ScaLAPACK *)(*A)->data;
19009566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(M, &a->M));
19019566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(N, &a->N));
19029566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
19039566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
19049566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
19059566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
19069566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*A));
19073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1908d24d4204SJose E. Roman }
1909