xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision c87776d362bbdcf41177f47d22f77862065f4f4d)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
327e75052SPierre Jolivet const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet                                        "       PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet                                        "       ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet                                        "       YEAR = 1997\n"
1127e75052SPierre Jolivet                                        "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite       = PETSC_FALSE;
1327e75052SPierre Jolivet 
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman 
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman 
22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23d71ae5a4SJacob Faibussowitsch {
24f7ec113fSDamian Marek   PetscFunctionBegin;
259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
269566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28f7ec113fSDamian Marek }
29f7ec113fSDamian Marek 
30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31d71ae5a4SJacob Faibussowitsch {
32d24d4204SJose E. Roman   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33d24d4204SJose E. Roman   PetscBool         iascii;
34d24d4204SJose E. Roman   PetscViewerFormat format;
35d24d4204SJose E. Roman   Mat               Adense;
36d24d4204SJose E. Roman 
37d24d4204SJose E. Roman   PetscFunctionBegin;
389566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39d24d4204SJose E. Roman   if (iascii) {
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
41d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
429566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
439566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
449566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
459566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
463ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
47d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
483ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
49d24d4204SJose E. Roman     }
50d24d4204SJose E. Roman   }
51d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
529566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
539566063dSJacob Faibussowitsch   PetscCall(MatView(Adense, viewer));
549566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56d24d4204SJose E. Roman }
57d24d4204SJose E. Roman 
58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59d71ae5a4SJacob Faibussowitsch {
60d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61d24d4204SJose E. Roman   PetscLogDouble isend[2], irecv[2];
62d24d4204SJose E. Roman 
63d24d4204SJose E. Roman   PetscFunctionBegin;
64d24d4204SJose E. Roman   info->block_size = 1.0;
65d24d4204SJose E. Roman 
66d24d4204SJose E. Roman   isend[0] = a->lld * a->locc;  /* locally allocated */
67d24d4204SJose E. Roman   isend[1] = a->locr * a->locc; /* used submatrix */
68d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69d24d4204SJose E. Roman     info->nz_allocated = isend[0];
70d24d4204SJose E. Roman     info->nz_used      = isend[1];
71d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
7257168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
74d24d4204SJose E. Roman     info->nz_used      = irecv[1];
75d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
7657168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
78d24d4204SJose E. Roman     info->nz_used      = irecv[1];
79d24d4204SJose E. Roman   }
80d24d4204SJose E. Roman 
81d24d4204SJose E. Roman   info->nz_unneeded       = 0;
82d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
83d24d4204SJose E. Roman   info->mallocs           = 0;
844dfa11a4SJacob Faibussowitsch   info->memory            = 0; /* REVIEW ME */
85d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
86d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
87d24d4204SJose E. Roman   info->factor_mallocs    = 0;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89d24d4204SJose E. Roman }
90d24d4204SJose E. Roman 
9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92d71ae5a4SJacob Faibussowitsch {
93b12397e7SPierre Jolivet   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94b12397e7SPierre Jolivet 
95d24d4204SJose E. Roman   PetscFunctionBegin;
96d24d4204SJose E. Roman   switch (op) {
97d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATIONS:
98d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATION_ERR:
99d24d4204SJose E. Roman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100d24d4204SJose E. Roman   case MAT_SYMMETRIC:
101d24d4204SJose E. Roman   case MAT_SORTED_FULL:
102d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
103d71ae5a4SJacob Faibussowitsch     break;
104d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
105d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
106d71ae5a4SJacob Faibussowitsch     break;
107d71ae5a4SJacob Faibussowitsch   default:
108d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109d24d4204SJose E. Roman   }
1103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
111d24d4204SJose E. Roman }
112d24d4204SJose E. Roman 
113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114d71ae5a4SJacob Faibussowitsch {
115d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116d24d4204SJose E. Roman   PetscInt       i, j;
117d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118b12397e7SPierre Jolivet   PetscBool      roworiented = a->roworiented;
119d24d4204SJose E. Roman 
120d24d4204SJose E. Roman   PetscFunctionBegin;
121b12397e7SPierre Jolivet   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122d24d4204SJose E. Roman   for (i = 0; i < nr; i++) {
123d24d4204SJose E. Roman     if (rows[i] < 0) continue;
1249566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125d24d4204SJose E. Roman     for (j = 0; j < nc; j++) {
126d24d4204SJose E. Roman       if (cols[j] < 0) continue;
1279566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129d24d4204SJose E. Roman       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130b12397e7SPierre Jolivet         if (roworiented) {
131d24d4204SJose E. Roman           switch (imode) {
132d71ae5a4SJacob Faibussowitsch           case INSERT_VALUES:
133d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134d71ae5a4SJacob Faibussowitsch             break;
135d71ae5a4SJacob Faibussowitsch           default:
136d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137d71ae5a4SJacob Faibussowitsch             break;
138b12397e7SPierre Jolivet           }
139b12397e7SPierre Jolivet         } else {
140b12397e7SPierre Jolivet           switch (imode) {
141d71ae5a4SJacob Faibussowitsch           case INSERT_VALUES:
142d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143d71ae5a4SJacob Faibussowitsch             break;
144d71ae5a4SJacob Faibussowitsch           default:
145d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146d71ae5a4SJacob Faibussowitsch             break;
147b12397e7SPierre Jolivet           }
148d24d4204SJose E. Roman         }
149d24d4204SJose E. Roman       } else {
15028b400f6SJacob Faibussowitsch         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
152b12397e7SPierre Jolivet         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153d24d4204SJose E. Roman       }
154d24d4204SJose E. Roman     }
155d24d4204SJose E. Roman   }
1563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
157d24d4204SJose E. Roman }
158d24d4204SJose E. Roman 
159d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160d71ae5a4SJacob Faibussowitsch {
161d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162d24d4204SJose E. Roman   PetscScalar    *x2d, *y2d, alpha = 1.0;
163d24d4204SJose E. Roman   const PetscInt *ranges;
164d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165d24d4204SJose E. Roman 
166d24d4204SJose E. Roman   PetscFunctionBegin;
167d24d4204SJose E. Roman   if (transpose) {
168d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1699566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1709566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171d24d4204SJose E. Roman     xlld = PetscMax(1, A->rmap->n);
172792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1749566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
1759566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176d24d4204SJose E. Roman     ylld = 1;
177792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
179d24d4204SJose E. Roman 
180d24d4204SJose E. Roman     /* allocate 2d vectors */
181d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184d24d4204SJose E. Roman     xlld = PetscMax(1, lszx);
185d24d4204SJose E. Roman 
186d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
189792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
191d24d4204SJose E. Roman 
192d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
193*c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194d24d4204SJose E. Roman 
195d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
196792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197d24d4204SJose E. Roman 
198d24d4204SJose E. Roman     /* call PBLAS subroutine */
199792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200d24d4204SJose E. Roman 
201d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
202792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
203d24d4204SJose E. Roman 
204d24d4204SJose E. Roman   } else { /* non-transpose */
205d24d4204SJose E. Roman 
206d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
2079566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
2089566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
209d24d4204SJose E. Roman     xlld = 1;
210792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
211d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
2129566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
2139566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
214d24d4204SJose E. Roman     ylld = PetscMax(1, A->rmap->n);
215792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
216d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
217d24d4204SJose E. Roman 
218d24d4204SJose E. Roman     /* allocate 2d vectors */
219d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
220d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
2219566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
222d24d4204SJose E. Roman     ylld = PetscMax(1, lszy);
223d24d4204SJose E. Roman 
224d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
225792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
226d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
227792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
228d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
229d24d4204SJose E. Roman 
230d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
231*c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
232d24d4204SJose E. Roman 
233d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
234792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
235d24d4204SJose E. Roman 
236d24d4204SJose E. Roman     /* call PBLAS subroutine */
237792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
238d24d4204SJose E. Roman 
239d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
240792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
241d24d4204SJose E. Roman   }
2429566063dSJacob Faibussowitsch   PetscCall(PetscFree2(x2d, y2d));
2433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
244d24d4204SJose E. Roman }
245d24d4204SJose E. Roman 
246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
247d71ae5a4SJacob Faibussowitsch {
248d24d4204SJose E. Roman   const PetscScalar *xarray;
249d24d4204SJose E. Roman   PetscScalar       *yarray;
250d24d4204SJose E. Roman 
251d24d4204SJose E. Roman   PetscFunctionBegin;
2529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
2549566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
2559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
2573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
258d24d4204SJose E. Roman }
259d24d4204SJose E. Roman 
260d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
261d71ae5a4SJacob Faibussowitsch {
262d24d4204SJose E. Roman   const PetscScalar *xarray;
263d24d4204SJose E. Roman   PetscScalar       *yarray;
264d24d4204SJose E. Roman 
265d24d4204SJose E. Roman   PetscFunctionBegin;
2669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
2689566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
2699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
2713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
272d24d4204SJose E. Roman }
273d24d4204SJose E. Roman 
274d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
275d71ae5a4SJacob Faibussowitsch {
276d24d4204SJose E. Roman   const PetscScalar *xarray;
277d24d4204SJose E. Roman   PetscScalar       *zarray;
278d24d4204SJose E. Roman 
279d24d4204SJose E. Roman   PetscFunctionBegin;
2809566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
2819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2829566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
2839566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
2849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
2863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
287d24d4204SJose E. Roman }
288d24d4204SJose E. Roman 
289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290d71ae5a4SJacob Faibussowitsch {
291d24d4204SJose E. Roman   const PetscScalar *xarray;
292d24d4204SJose E. Roman   PetscScalar       *zarray;
293d24d4204SJose E. Roman 
294d24d4204SJose E. Roman   PetscFunctionBegin;
2959566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
2969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
2989566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
2999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
3009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
3013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
302d24d4204SJose E. Roman }
303d24d4204SJose E. Roman 
304d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
305d71ae5a4SJacob Faibussowitsch {
306d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
307d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
308d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
309d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
310d24d4204SJose E. Roman   PetscBLASInt   one = 1;
311d24d4204SJose E. Roman 
312d24d4204SJose E. Roman   PetscFunctionBegin;
313792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
314d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
3153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
316d24d4204SJose E. Roman }
317d24d4204SJose E. Roman 
318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
319d71ae5a4SJacob Faibussowitsch {
320d24d4204SJose E. Roman   PetscFunctionBegin;
3219566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3229566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
3239566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
324d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
3253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
326d24d4204SJose E. Roman }
327d24d4204SJose E. Roman 
328d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
329d71ae5a4SJacob Faibussowitsch {
330d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
331d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
332d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
333d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
334d24d4204SJose E. Roman   PetscBLASInt   one = 1;
335d24d4204SJose E. Roman 
336d24d4204SJose E. Roman   PetscFunctionBegin;
337792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
338d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
3393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
340d24d4204SJose E. Roman }
341d24d4204SJose E. Roman 
342d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
343d71ae5a4SJacob Faibussowitsch {
344d24d4204SJose E. Roman   PetscFunctionBegin;
3459566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
3469566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
3479566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
3483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
349d24d4204SJose E. Roman }
350d24d4204SJose E. Roman 
351d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
352d71ae5a4SJacob Faibussowitsch {
353d24d4204SJose E. Roman   PetscFunctionBegin;
354d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
355d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
3563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
357d24d4204SJose E. Roman }
358d24d4204SJose E. Roman 
359d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
360d71ae5a4SJacob Faibussowitsch {
361d24d4204SJose E. Roman   PetscFunctionBegin;
362d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
363d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
365d24d4204SJose E. Roman }
366d24d4204SJose E. Roman 
367d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
368d71ae5a4SJacob Faibussowitsch {
369d24d4204SJose E. Roman   Mat_Product *product = C->product;
370d24d4204SJose E. Roman 
371d24d4204SJose E. Roman   PetscFunctionBegin;
372d24d4204SJose E. Roman   switch (product->type) {
373d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
374d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
375d71ae5a4SJacob Faibussowitsch     break;
376d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
377d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
378d71ae5a4SJacob Faibussowitsch     break;
379d71ae5a4SJacob Faibussowitsch   default:
380d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
381d24d4204SJose E. Roman   }
3823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
383d24d4204SJose E. Roman }
384d24d4204SJose E. Roman 
385d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
386d71ae5a4SJacob Faibussowitsch {
387d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
388d24d4204SJose E. Roman   PetscScalar    *darray, *d2d, v;
389d24d4204SJose E. Roman   const PetscInt *ranges;
390d24d4204SJose E. Roman   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
391d24d4204SJose E. Roman 
392d24d4204SJose E. Roman   PetscFunctionBegin;
3939566063dSJacob Faibussowitsch   PetscCall(VecGetArray(D, &darray));
394d24d4204SJose E. Roman 
395d24d4204SJose E. Roman   if (A->rmap->N <= A->cmap->N) { /* row version */
396d24d4204SJose E. Roman 
397d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
3989566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
3999566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
400d24d4204SJose E. Roman     dlld = PetscMax(1, A->rmap->n);
401792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
402d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
403d24d4204SJose E. Roman 
404d24d4204SJose E. Roman     /* allocate 2d vector */
405d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
4069566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
407d24d4204SJose E. Roman     dlld = PetscMax(1, lszd);
408d24d4204SJose E. Roman 
409d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
410792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
411d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
412d24d4204SJose E. Roman 
413d24d4204SJose E. Roman     /* collect diagonal */
414d24d4204SJose E. Roman     for (j = 1; j <= a->M; j++) {
415792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
416792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
417d24d4204SJose E. Roman     }
418d24d4204SJose E. Roman 
419d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
420792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
4219566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
422d24d4204SJose E. Roman 
423d24d4204SJose E. Roman   } else { /* column version */
424d24d4204SJose E. Roman 
425d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4269566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
4279566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
428d24d4204SJose E. Roman     dlld = 1;
429792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
430d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
431d24d4204SJose E. Roman 
432d24d4204SJose E. Roman     /* allocate 2d vector */
433d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4349566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
435d24d4204SJose E. Roman 
436d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
437792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
438d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
439d24d4204SJose E. Roman 
440d24d4204SJose E. Roman     /* collect diagonal */
441d24d4204SJose E. Roman     for (j = 1; j <= a->N; j++) {
442792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
443792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
444d24d4204SJose E. Roman     }
445d24d4204SJose E. Roman 
446d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
447792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
4489566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
449d24d4204SJose E. Roman   }
450d24d4204SJose E. Roman 
4519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(D, &darray));
4529566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(D));
4539566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(D));
4543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
455d24d4204SJose E. Roman }
456d24d4204SJose E. Roman 
457d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
458d71ae5a4SJacob Faibussowitsch {
459d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
460d24d4204SJose E. Roman   const PetscScalar *d;
461d24d4204SJose E. Roman   const PetscInt    *ranges;
462d24d4204SJose E. Roman   PetscScalar       *d2d;
463d24d4204SJose E. Roman   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
464d24d4204SJose E. Roman 
465d24d4204SJose E. Roman   PetscFunctionBegin;
466d24d4204SJose E. Roman   if (R) {
4679566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
468d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4699566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
4709566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
471d24d4204SJose E. Roman     dlld = 1;
472792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
473d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
474d24d4204SJose E. Roman 
475d24d4204SJose E. Roman     /* allocate 2d vector */
476d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4779566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
478d24d4204SJose E. Roman 
479d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
480792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
481d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
482d24d4204SJose E. Roman 
483d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
484*c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
485d24d4204SJose E. Roman 
486d24d4204SJose E. Roman     /* broadcast along process columns */
487d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
488d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
489d24d4204SJose E. Roman 
490d24d4204SJose E. Roman     /* local scaling */
4919371c9d4SSatish Balay     for (j = 0; j < a->locc; j++)
4929371c9d4SSatish Balay       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
493d24d4204SJose E. Roman 
4949566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
4959566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
496d24d4204SJose E. Roman   }
497d24d4204SJose E. Roman   if (L) {
4989566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
499d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
5009566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
5019566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
502d24d4204SJose E. Roman     dlld = PetscMax(1, A->rmap->n);
503792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
504d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
505d24d4204SJose E. Roman 
506d24d4204SJose E. Roman     /* allocate 2d vector */
507d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
5089566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
509d24d4204SJose E. Roman     dlld = PetscMax(1, lszd);
510d24d4204SJose E. Roman 
511d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
512792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
513d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
514d24d4204SJose E. Roman 
515d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
516*c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
517d24d4204SJose E. Roman 
518d24d4204SJose E. Roman     /* broadcast along process rows */
519d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
520d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
521d24d4204SJose E. Roman 
522d24d4204SJose E. Roman     /* local scaling */
5239371c9d4SSatish Balay     for (i = 0; i < a->locr; i++)
5249371c9d4SSatish Balay       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
525d24d4204SJose E. Roman 
5269566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
5279566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
528d24d4204SJose E. Roman   }
5293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
530d24d4204SJose E. Roman }
531d24d4204SJose E. Roman 
532d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
533d71ae5a4SJacob Faibussowitsch {
534d24d4204SJose E. Roman   PetscFunctionBegin;
535d24d4204SJose E. Roman   *missing = PETSC_FALSE;
5363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
537d24d4204SJose E. Roman }
538d24d4204SJose E. Roman 
539d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
540d71ae5a4SJacob Faibussowitsch {
541d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
542d24d4204SJose E. Roman   PetscBLASInt   n, one = 1;
543d24d4204SJose E. Roman 
544d24d4204SJose E. Roman   PetscFunctionBegin;
545d24d4204SJose E. Roman   n = x->lld * x->locc;
546792fecdfSBarry Smith   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
5473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
548d24d4204SJose E. Roman }
549d24d4204SJose E. Roman 
550d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
551d71ae5a4SJacob Faibussowitsch {
552d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
553d24d4204SJose E. Roman   PetscBLASInt   i, n;
554d24d4204SJose E. Roman   PetscScalar    v;
555d24d4204SJose E. Roman 
556d24d4204SJose E. Roman   PetscFunctionBegin;
557d24d4204SJose E. Roman   n = PetscMin(x->M, x->N);
558d24d4204SJose E. Roman   for (i = 1; i <= n; i++) {
559792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
560d24d4204SJose E. Roman     v += alpha;
561792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
562d24d4204SJose E. Roman   }
5633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
564d24d4204SJose E. Roman }
565d24d4204SJose E. Roman 
566d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
567d71ae5a4SJacob Faibussowitsch {
568d24d4204SJose E. Roman   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
569d24d4204SJose E. Roman   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
570d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
571d24d4204SJose E. Roman   PetscScalar    beta = 1.0;
572d24d4204SJose E. Roman 
573d24d4204SJose E. Roman   PetscFunctionBegin;
574d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
575792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
5769566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
5773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
578d24d4204SJose E. Roman }
579d24d4204SJose E. Roman 
580d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
581d71ae5a4SJacob Faibussowitsch {
582d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
583d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
584d24d4204SJose E. Roman 
585d24d4204SJose E. Roman   PetscFunctionBegin;
5869566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
5879566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
5883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
589d24d4204SJose E. Roman }
590d24d4204SJose E. Roman 
591d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
592d71ae5a4SJacob Faibussowitsch {
593d24d4204SJose E. Roman   Mat            Bs;
594d24d4204SJose E. Roman   MPI_Comm       comm;
595d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
596d24d4204SJose E. Roman 
597d24d4204SJose E. Roman   PetscFunctionBegin;
5989566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5999566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Bs));
6009566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
6019566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bs, MATSCALAPACK));
602d24d4204SJose E. Roman   b       = (Mat_ScaLAPACK *)Bs->data;
603d24d4204SJose E. Roman   b->M    = a->M;
604d24d4204SJose E. Roman   b->N    = a->N;
605d24d4204SJose E. Roman   b->mb   = a->mb;
606d24d4204SJose E. Roman   b->nb   = a->nb;
607d24d4204SJose E. Roman   b->rsrc = a->rsrc;
608d24d4204SJose E. Roman   b->csrc = a->csrc;
6099566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Bs));
610d24d4204SJose E. Roman   *B = Bs;
61148a46eb9SPierre Jolivet   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
612d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
614d24d4204SJose E. Roman }
615d24d4204SJose E. Roman 
616d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
617d71ae5a4SJacob Faibussowitsch {
618d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
619d24d4204SJose E. Roman   Mat            Bs   = *B;
620d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
621d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
622d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
623d24d4204SJose E. Roman   PetscInt i, j;
624d24d4204SJose E. Roman #endif
625d24d4204SJose E. Roman 
626d24d4204SJose E. Roman   PetscFunctionBegin;
6277fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
6280fdf79fbSJacob Faibussowitsch   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
6299566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
630d24d4204SJose E. Roman   *B = Bs;
631d24d4204SJose E. Roman   b  = (Mat_ScaLAPACK *)Bs->data;
632792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
633d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
634d24d4204SJose E. Roman   /* undo conjugation */
6359371c9d4SSatish Balay   for (i = 0; i < b->locr; i++)
6369371c9d4SSatish Balay     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
637d24d4204SJose E. Roman #endif
638d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
640d24d4204SJose E. Roman }
641d24d4204SJose E. Roman 
642d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
643d71ae5a4SJacob Faibussowitsch {
644d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
645d24d4204SJose E. Roman   PetscInt       i, j;
646d24d4204SJose E. Roman 
647d24d4204SJose E. Roman   PetscFunctionBegin;
6489371c9d4SSatish Balay   for (i = 0; i < a->locr; i++)
6499371c9d4SSatish Balay     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
6503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
651d24d4204SJose E. Roman }
652d24d4204SJose E. Roman 
653d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
654d71ae5a4SJacob Faibussowitsch {
655d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
656d24d4204SJose E. Roman   Mat            Bs   = *B;
657d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
658d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
659d24d4204SJose E. Roman 
660d24d4204SJose E. Roman   PetscFunctionBegin;
6610fdf79fbSJacob Faibussowitsch   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
6629566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
663d24d4204SJose E. Roman   *B = Bs;
664d24d4204SJose E. Roman   b  = (Mat_ScaLAPACK *)Bs->data;
665792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
666d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
668d24d4204SJose E. Roman }
669d24d4204SJose E. Roman 
670d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
671d71ae5a4SJacob Faibussowitsch {
672d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
673d24d4204SJose E. Roman   PetscScalar    *x, *x2d;
674d24d4204SJose E. Roman   const PetscInt *ranges;
675d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
676d24d4204SJose E. Roman 
677d24d4204SJose E. Roman   PetscFunctionBegin;
6789566063dSJacob Faibussowitsch   PetscCall(VecCopy(B, X));
6799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(X, &x));
680d24d4204SJose E. Roman 
681d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
6829566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
6839566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
684d24d4204SJose E. Roman   xlld = PetscMax(1, A->rmap->n);
685792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
686d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
687d24d4204SJose E. Roman 
688d24d4204SJose E. Roman   /* allocate 2d vector */
689d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
6909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(lszx, &x2d));
691d24d4204SJose E. Roman   xlld = PetscMax(1, lszx);
692d24d4204SJose E. Roman 
693d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
694792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
695d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
696d24d4204SJose E. Roman 
697d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
698792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
699d24d4204SJose E. Roman 
700d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
701d24d4204SJose E. Roman   switch (A->factortype) {
702d24d4204SJose E. Roman   case MAT_FACTOR_LU:
703792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
704d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
705d24d4204SJose E. Roman     break;
706d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
707792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
708d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
709d24d4204SJose E. Roman     break;
710d71ae5a4SJacob Faibussowitsch   default:
711d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
712d24d4204SJose E. Roman   }
713d24d4204SJose E. Roman 
714d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
715792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
716d24d4204SJose E. Roman 
7179566063dSJacob Faibussowitsch   PetscCall(PetscFree(x2d));
7189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(X, &x));
7193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
720d24d4204SJose E. Roman }
721d24d4204SJose E. Roman 
722d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
723d71ae5a4SJacob Faibussowitsch {
724d24d4204SJose E. Roman   PetscFunctionBegin;
7259566063dSJacob Faibussowitsch   PetscCall(MatSolve_ScaLAPACK(A, B, X));
7269566063dSJacob Faibussowitsch   PetscCall(VecAXPY(X, 1, Y));
7273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
728d24d4204SJose E. Roman }
729d24d4204SJose E. Roman 
730d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
731d71ae5a4SJacob Faibussowitsch {
732d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
733d24d4204SJose E. Roman   PetscBool      flg1, flg2;
734d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
735d24d4204SJose E. Roman 
736d24d4204SJose E. Roman   PetscFunctionBegin;
7379566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
7389566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
73908401ef6SPierre Jolivet   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
740d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B, 1, X, 2);
741d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)B->data;
742d24d4204SJose E. Roman   x = (Mat_ScaLAPACK *)X->data;
7439566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
744d24d4204SJose E. Roman 
745d24d4204SJose E. Roman   switch (A->factortype) {
746d24d4204SJose E. Roman   case MAT_FACTOR_LU:
747792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
748d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
749d24d4204SJose E. Roman     break;
750d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
751792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
752d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
753d24d4204SJose E. Roman     break;
754d71ae5a4SJacob Faibussowitsch   default:
755d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
756d24d4204SJose E. Roman   }
7573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
758d24d4204SJose E. Roman }
759d24d4204SJose E. Roman 
760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
761d71ae5a4SJacob Faibussowitsch {
762d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
763d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
764d24d4204SJose E. Roman 
765d24d4204SJose E. Roman   PetscFunctionBegin;
766aa624791SPierre Jolivet   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
767792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
768d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf", info);
769d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
770d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
771d24d4204SJose E. Roman 
7729566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
7739566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
7743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
775d24d4204SJose E. Roman }
776d24d4204SJose E. Roman 
777d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
778d71ae5a4SJacob Faibussowitsch {
779d24d4204SJose E. Roman   PetscFunctionBegin;
7809566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
7819566063dSJacob Faibussowitsch   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
7823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
783d24d4204SJose E. Roman }
784d24d4204SJose E. Roman 
785d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
786d71ae5a4SJacob Faibussowitsch {
787d24d4204SJose E. Roman   PetscFunctionBegin;
788d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
7893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
790d24d4204SJose E. Roman }
791d24d4204SJose E. Roman 
792d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
793d71ae5a4SJacob Faibussowitsch {
794d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
795d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
796d24d4204SJose E. Roman 
797d24d4204SJose E. Roman   PetscFunctionBegin;
798792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
799d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf", info);
800d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
801d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
802d24d4204SJose E. Roman 
8039566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
8049566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
806d24d4204SJose E. Roman }
807d24d4204SJose E. Roman 
808d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
809d71ae5a4SJacob Faibussowitsch {
810d24d4204SJose E. Roman   PetscFunctionBegin;
8119566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8129566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
8133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
814d24d4204SJose E. Roman }
815d24d4204SJose E. Roman 
816d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
817d71ae5a4SJacob Faibussowitsch {
818d24d4204SJose E. Roman   PetscFunctionBegin;
819d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
821d24d4204SJose E. Roman }
822d24d4204SJose E. Roman 
82366976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
824d71ae5a4SJacob Faibussowitsch {
825d24d4204SJose E. Roman   PetscFunctionBegin;
826d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
8273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
828d24d4204SJose E. Roman }
829d24d4204SJose E. Roman 
830d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
831d71ae5a4SJacob Faibussowitsch {
832d24d4204SJose E. Roman   Mat            B;
83359172f18SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
834d24d4204SJose E. Roman 
835d24d4204SJose E. Roman   PetscFunctionBegin;
836d24d4204SJose E. Roman   /* Create the factorization matrix */
8379566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
83866e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
839d24d4204SJose E. Roman   B->factortype      = ftype;
8409566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
8419566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
842d24d4204SJose E. Roman 
8439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
844d24d4204SJose E. Roman   *F = B;
8453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
846d24d4204SJose E. Roman }
847d24d4204SJose E. Roman 
848d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
849d71ae5a4SJacob Faibussowitsch {
850d24d4204SJose E. Roman   PetscFunctionBegin;
8519566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
8529566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
8533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
854d24d4204SJose E. Roman }
855d24d4204SJose E. Roman 
856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
857d71ae5a4SJacob Faibussowitsch {
858d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
859d24d4204SJose E. Roman   PetscBLASInt   one = 1, lwork = 0;
860d24d4204SJose E. Roman   const char    *ntype;
861d68f4f38SPierre Jolivet   PetscScalar   *work = NULL, dummy;
862d24d4204SJose E. Roman 
863d24d4204SJose E. Roman   PetscFunctionBegin;
864d24d4204SJose E. Roman   switch (type) {
865d24d4204SJose E. Roman   case NORM_1:
866d24d4204SJose E. Roman     ntype = "1";
867d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
868d24d4204SJose E. Roman     break;
869d24d4204SJose E. Roman   case NORM_FROBENIUS:
870d24d4204SJose E. Roman     ntype = "F";
871d24d4204SJose E. Roman     work  = &dummy;
872d24d4204SJose E. Roman     break;
873d24d4204SJose E. Roman   case NORM_INFINITY:
874d24d4204SJose E. Roman     ntype = "I";
875d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
876d24d4204SJose E. Roman     break;
877d71ae5a4SJacob Faibussowitsch   default:
878d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
879d24d4204SJose E. Roman   }
8809566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
881d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
8829566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscFree(work));
8833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
884d24d4204SJose E. Roman }
885d24d4204SJose E. Roman 
886d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
887d71ae5a4SJacob Faibussowitsch {
888d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
889d24d4204SJose E. Roman 
890d24d4204SJose E. Roman   PetscFunctionBegin;
8919566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
8923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
893d24d4204SJose E. Roman }
894d24d4204SJose E. Roman 
895d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
896d71ae5a4SJacob Faibussowitsch {
897d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
898d24d4204SJose E. Roman   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
899d24d4204SJose E. Roman 
900d24d4204SJose E. Roman   PetscFunctionBegin;
901d24d4204SJose E. Roman   if (rows) {
902d24d4204SJose E. Roman     n     = a->locr;
903d24d4204SJose E. Roman     nb    = a->mb;
904d24d4204SJose E. Roman     isrc  = a->rsrc;
905d24d4204SJose E. Roman     nproc = a->grid->nprow;
906d24d4204SJose E. Roman     iproc = a->grid->myrow;
9079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
908d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9099566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
910d24d4204SJose E. Roman   }
911d24d4204SJose E. Roman   if (cols) {
912d24d4204SJose E. Roman     n     = a->locc;
913d24d4204SJose E. Roman     nb    = a->nb;
914d24d4204SJose E. Roman     isrc  = a->csrc;
915d24d4204SJose E. Roman     nproc = a->grid->npcol;
916d24d4204SJose E. Roman     iproc = a->grid->mycol;
9179566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
918d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9199566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
920d24d4204SJose E. Roman   }
9213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
922d24d4204SJose E. Roman }
923d24d4204SJose E. Roman 
924d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
925d71ae5a4SJacob Faibussowitsch {
926d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
927d24d4204SJose E. Roman   Mat                Bmpi;
928d24d4204SJose E. Roman   MPI_Comm           comm;
929a00b6df3SJose E. Roman   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
9304b1a79daSJose E. Roman   const PetscInt    *ranges, *branges, *cwork;
9314b1a79daSJose E. Roman   const PetscScalar *vwork;
932d24d4204SJose E. Roman   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
933d24d4204SJose E. Roman   PetscScalar       *barray;
9344b1a79daSJose E. Roman   PetscBool          differ = PETSC_FALSE;
9354b1a79daSJose E. Roman   PetscMPIInt        size;
936d24d4204SJose E. Roman 
937d24d4204SJose E. Roman   PetscFunctionBegin;
9389566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9399566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
9404b1a79daSJose E. Roman 
9414b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9429566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
9439566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
9449371c9d4SSatish Balay     for (i = 0; i < size; i++)
9459371c9d4SSatish Balay       if (ranges[i + 1] != branges[i + 1]) {
9469371c9d4SSatish Balay         differ = PETSC_TRUE;
9479371c9d4SSatish Balay         break;
9489371c9d4SSatish Balay       }
9494b1a79daSJose E. Roman   }
9504b1a79daSJose E. Roman 
9514b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
9529566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
9534b1a79daSJose E. Roman     m = PETSC_DECIDE;
9549566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
9554b1a79daSJose E. Roman     n = PETSC_DECIDE;
9569566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
9579566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
9589566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATDENSE));
9599566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
9604b1a79daSJose E. Roman 
9614b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9629566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
963a00b6df3SJose E. Roman     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
964a00b6df3SJose E. Roman     lld = PetscMax(ldb, 1); /* local leading dimension */
965792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
9664b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
9674b1a79daSJose E. Roman 
9684b1a79daSJose E. Roman     /* redistribute matrix */
9699566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
970792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
9719566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
9729566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
9739566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
9744b1a79daSJose E. Roman 
9754b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
9769566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
9774b1a79daSJose E. Roman     for (i = rstart; i < rend; i++) {
9789566063dSJacob Faibussowitsch       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
9799566063dSJacob Faibussowitsch       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
9809566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
9814b1a79daSJose E. Roman     }
9829566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
9839566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
9849566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Bmpi));
9854b1a79daSJose E. Roman 
9864b1a79daSJose E. Roman   } else { /* normal cases */
987d24d4204SJose E. Roman 
988d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
989d24d4204SJose E. Roman     else {
9909566063dSJacob Faibussowitsch       PetscCall(MatCreate(comm, &Bmpi));
99192c846b4SJose E. Roman       m = PETSC_DECIDE;
9929566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
99392c846b4SJose E. Roman       n = PETSC_DECIDE;
9949566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
9959566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
9969566063dSJacob Faibussowitsch       PetscCall(MatSetType(Bmpi, MATDENSE));
9979566063dSJacob Faibussowitsch       PetscCall(MatSetUp(Bmpi));
998d24d4204SJose E. Roman     }
999d24d4204SJose E. Roman 
1000d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
10019566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1002a00b6df3SJose E. Roman     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1003a00b6df3SJose E. Roman     lld = PetscMax(ldb, 1); /* local leading dimension */
1004792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1005d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1006d24d4204SJose E. Roman 
1007d24d4204SJose E. Roman     /* redistribute matrix */
10089566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
1009792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10109566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1011d24d4204SJose E. Roman 
10129566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10139566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1014d24d4204SJose E. Roman     if (reuse == MAT_INPLACE_MATRIX) {
10159566063dSJacob Faibussowitsch       PetscCall(MatHeaderReplace(A, &Bmpi));
1016d24d4204SJose E. Roman     } else *B = Bmpi;
10174b1a79daSJose E. Roman   }
10183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1019d24d4204SJose E. Roman }
1020d24d4204SJose E. Roman 
1021d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1022d71ae5a4SJacob Faibussowitsch {
1023b12397e7SPierre Jolivet   const PetscInt *ranges;
1024b12397e7SPierre Jolivet   PetscMPIInt     size;
1025b12397e7SPierre Jolivet   PetscInt        i, n;
1026b12397e7SPierre Jolivet 
1027b12397e7SPierre Jolivet   PetscFunctionBegin;
1028b12397e7SPierre Jolivet   *correct = PETSC_TRUE;
1029b12397e7SPierre Jolivet   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1030b12397e7SPierre Jolivet   if (size > 1) {
1031b12397e7SPierre Jolivet     PetscCall(PetscLayoutGetRanges(map, &ranges));
1032b12397e7SPierre Jolivet     n = ranges[1] - ranges[0];
10339371c9d4SSatish Balay     for (i = 1; i < size; i++)
10349371c9d4SSatish Balay       if (ranges[i + 1] - ranges[i] != n) break;
1035b12397e7SPierre Jolivet     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1036b12397e7SPierre Jolivet   }
10373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1038b12397e7SPierre Jolivet }
1039b12397e7SPierre Jolivet 
1040d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1041d71ae5a4SJacob Faibussowitsch {
1042d24d4204SJose E. Roman   Mat_ScaLAPACK     *b;
1043d24d4204SJose E. Roman   Mat                Bmpi;
1044d24d4204SJose E. Roman   MPI_Comm           comm;
104592c846b4SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1046b12397e7SPierre Jolivet   const PetscInt    *ranges, *rows, *cols;
1047d24d4204SJose E. Roman   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1048*c87776d3SJose E. Roman   const PetscScalar *aarray;
1049b12397e7SPierre Jolivet   IS                 ir, ic;
10504e8b52a1SJose E. Roman   PetscInt           lda;
1051b12397e7SPierre Jolivet   PetscBool          flg;
1052d24d4204SJose E. Roman 
1053d24d4204SJose E. Roman   PetscFunctionBegin;
10549566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1055d24d4204SJose E. Roman 
1056d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1057d24d4204SJose E. Roman   else {
10589566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
105992c846b4SJose E. Roman     m = PETSC_DECIDE;
10609566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
106192c846b4SJose E. Roman     n = PETSC_DECIDE;
10629566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10639566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10649566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
10659566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
1066d24d4204SJose E. Roman   }
1067d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)Bmpi->data;
1068d24d4204SJose E. Roman 
1069b12397e7SPierre Jolivet   PetscCall(MatDenseGetLDA(A, &lda));
1070*c87776d3SJose E. Roman   PetscCall(MatDenseGetArrayRead(A, &aarray));
1071b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1072b12397e7SPierre Jolivet   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1073b12397e7SPierre Jolivet   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1074d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for A (1d block distribution) */
10759566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
10769566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
10774e8b52a1SJose E. Roman     lld = PetscMax(lda, 1);                       /* local leading dimension */
1078792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1079d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1080d24d4204SJose E. Roman 
1081d24d4204SJose E. Roman     /* redistribute matrix */
1082792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1083b12397e7SPierre Jolivet     Bmpi->nooffprocentries = PETSC_TRUE;
1084b12397e7SPierre Jolivet   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1085b12397e7SPierre Jolivet     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1086b12397e7SPierre Jolivet     b->roworiented = PETSC_FALSE;
1087b12397e7SPierre Jolivet     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1088b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ir, &rows));
1089b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ic, &cols));
1090b12397e7SPierre Jolivet     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1091b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ir, &rows));
1092b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ic, &cols));
1093b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ic));
1094b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ir));
1095b12397e7SPierre Jolivet   }
1096*c87776d3SJose E. Roman   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
10979566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10989566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1099d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
11009566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &Bmpi));
1101d24d4204SJose E. Roman   } else *B = Bmpi;
11023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1103d24d4204SJose E. Roman }
1104d24d4204SJose E. Roman 
1105d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1106d71ae5a4SJacob Faibussowitsch {
1107d24d4204SJose E. Roman   Mat                mat_scal;
1108d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1109d24d4204SJose E. Roman   const PetscInt    *cols;
1110d24d4204SJose E. Roman   const PetscScalar *vals;
1111d24d4204SJose E. Roman 
1112d24d4204SJose E. Roman   PetscFunctionBegin;
1113d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1114d24d4204SJose E. Roman     mat_scal = *newmat;
11159566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1116d24d4204SJose E. Roman   } else {
11179566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1118d24d4204SJose E. Roman     m = PETSC_DECIDE;
11199566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1120d24d4204SJose E. Roman     n = PETSC_DECIDE;
11219566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
11229566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
11239566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
11249566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1125d24d4204SJose E. Roman   }
1126d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
11279566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11289566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
11299566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1130d24d4204SJose E. Roman   }
11319566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
11329566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1133d24d4204SJose E. Roman 
11349566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1135d24d4204SJose E. Roman   else *newmat = mat_scal;
11363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1137d24d4204SJose E. Roman }
1138d24d4204SJose E. Roman 
1139d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1140d71ae5a4SJacob Faibussowitsch {
1141d24d4204SJose E. Roman   Mat                mat_scal;
1142d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1143d24d4204SJose E. Roman   const PetscInt    *cols;
1144d24d4204SJose E. Roman   const PetscScalar *vals;
1145d24d4204SJose E. Roman   PetscScalar        v;
1146d24d4204SJose E. Roman 
1147d24d4204SJose E. Roman   PetscFunctionBegin;
1148d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1149d24d4204SJose E. Roman     mat_scal = *newmat;
11509566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1151d24d4204SJose E. Roman   } else {
11529566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1153d24d4204SJose E. Roman     m = PETSC_DECIDE;
11549566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1155d24d4204SJose E. Roman     n = PETSC_DECIDE;
11569566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
11579566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
11589566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
11599566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1160d24d4204SJose E. Roman   }
11619566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
1162d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
11639566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11649566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1165d24d4204SJose E. Roman     for (j = 0; j < ncols; j++) { /* lower triangular part */
1166d24d4204SJose E. Roman       if (cols[j] == row) continue;
1167b94d7dedSBarry Smith       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
11689566063dSJacob Faibussowitsch       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1169d24d4204SJose E. Roman     }
11709566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1171d24d4204SJose E. Roman   }
11729566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
11739566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
11749566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1175d24d4204SJose E. Roman 
11769566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1177d24d4204SJose E. Roman   else *newmat = mat_scal;
11783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1179d24d4204SJose E. Roman }
1180d24d4204SJose E. Roman 
1181d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1182d71ae5a4SJacob Faibussowitsch {
1183d24d4204SJose E. Roman   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1184d24d4204SJose E. Roman   PetscInt       sz = 0;
1185d24d4204SJose E. Roman 
1186d24d4204SJose E. Roman   PetscFunctionBegin;
11879566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
11889566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1189d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1190d24d4204SJose E. Roman 
11919566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
11929566063dSJacob Faibussowitsch   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
11939566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(sz, &a->loc));
1194d24d4204SJose E. Roman 
1195d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
11963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1197d24d4204SJose E. Roman }
1198d24d4204SJose E. Roman 
1199d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1200d71ae5a4SJacob Faibussowitsch {
1201d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1202d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1203d24d4204SJose E. Roman   PetscBool           flg;
1204d24d4204SJose E. Roman   MPI_Comm            icomm;
1205d24d4204SJose E. Roman 
1206d24d4204SJose E. Roman   PetscFunctionBegin;
12079566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&A->stash));
12089566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
12099566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->pivots));
12109566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
12119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1212d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1213d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1214d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1215d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
12169566063dSJacob Faibussowitsch     PetscCall(PetscFree(grid));
12179566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1218d24d4204SJose E. Roman   }
12199566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
12209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
12219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
12229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
12239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
12249566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
12253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1226d24d4204SJose E. Roman }
1227d24d4204SJose E. Roman 
122866976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1229d71ae5a4SJacob Faibussowitsch {
1230d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1231d24d4204SJose E. Roman   PetscBLASInt   info = 0;
1232b12397e7SPierre Jolivet   PetscBool      flg;
1233d24d4204SJose E. Roman 
1234d24d4204SJose E. Roman   PetscFunctionBegin;
12359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12369566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1237d24d4204SJose E. Roman 
1238b12397e7SPierre Jolivet   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1239b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1240b12397e7SPierre Jolivet   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1241b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1242b12397e7SPierre Jolivet   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1243d24d4204SJose E. Roman 
1244d24d4204SJose E. Roman   /* compute local sizes */
12459566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
12469566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1247d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1248d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1249d24d4204SJose E. Roman   a->lld  = PetscMax(1, a->locr);
1250d24d4204SJose E. Roman 
1251d24d4204SJose E. Roman   /* allocate local array */
12529566063dSJacob Faibussowitsch   PetscCall(MatScaLAPACKSetPreallocation(A));
1253d24d4204SJose E. Roman 
1254d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1255792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1256d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
12573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1258d24d4204SJose E. Roman }
1259d24d4204SJose E. Roman 
126066976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1261d71ae5a4SJacob Faibussowitsch {
1262d24d4204SJose E. Roman   PetscInt nstash, reallocs;
1263d24d4204SJose E. Roman 
1264d24d4204SJose E. Roman   PetscFunctionBegin;
12653ba16761SJacob Faibussowitsch   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
12669566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
12679566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
12689566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
12693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1270d24d4204SJose E. Roman }
1271d24d4204SJose E. Roman 
127266976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1273d71ae5a4SJacob Faibussowitsch {
1274d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1275d24d4204SJose E. Roman   PetscMPIInt    n;
1276d24d4204SJose E. Roman   PetscInt       i, flg, *row, *col;
1277d24d4204SJose E. Roman   PetscScalar   *val;
1278d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1279d24d4204SJose E. Roman 
1280d24d4204SJose E. Roman   PetscFunctionBegin;
12813ba16761SJacob Faibussowitsch   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1282d24d4204SJose E. Roman   while (1) {
12839566063dSJacob Faibussowitsch     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1284d24d4204SJose E. Roman     if (!flg) break;
1285d24d4204SJose E. Roman     for (i = 0; i < n; i++) {
12869566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
12879566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1288792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1289aed4548fSBarry Smith       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1290d24d4204SJose E. Roman       switch (A->insertmode) {
1291d71ae5a4SJacob Faibussowitsch       case INSERT_VALUES:
1292d71ae5a4SJacob Faibussowitsch         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1293d71ae5a4SJacob Faibussowitsch         break;
1294d71ae5a4SJacob Faibussowitsch       case ADD_VALUES:
1295d71ae5a4SJacob Faibussowitsch         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1296d71ae5a4SJacob Faibussowitsch         break;
1297d71ae5a4SJacob Faibussowitsch       default:
1298d71ae5a4SJacob Faibussowitsch         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1299d24d4204SJose E. Roman       }
1300d24d4204SJose E. Roman     }
1301d24d4204SJose E. Roman   }
13029566063dSJacob Faibussowitsch   PetscCall(MatStashScatterEnd_Private(&A->stash));
13033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1304d24d4204SJose E. Roman }
1305d24d4204SJose E. Roman 
130666976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1307d71ae5a4SJacob Faibussowitsch {
1308d24d4204SJose E. Roman   Mat      Adense, As;
1309d24d4204SJose E. Roman   MPI_Comm comm;
1310d24d4204SJose E. Roman 
1311d24d4204SJose E. Roman   PetscFunctionBegin;
13129566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
13139566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Adense));
13149566063dSJacob Faibussowitsch   PetscCall(MatSetType(Adense, MATDENSE));
13159566063dSJacob Faibussowitsch   PetscCall(MatLoad(Adense, viewer));
13169566063dSJacob Faibussowitsch   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
13179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
13189566063dSJacob Faibussowitsch   PetscCall(MatHeaderReplace(newMat, &As));
13193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1320d24d4204SJose E. Roman }
1321d24d4204SJose E. Roman 
13229371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1323d24d4204SJose E. Roman                                        0,
1324d24d4204SJose E. Roman                                        0,
1325d24d4204SJose E. Roman                                        MatMult_ScaLAPACK,
1326d24d4204SJose E. Roman                                        /* 4*/ MatMultAdd_ScaLAPACK,
1327d24d4204SJose E. Roman                                        MatMultTranspose_ScaLAPACK,
1328d24d4204SJose E. Roman                                        MatMultTransposeAdd_ScaLAPACK,
1329d24d4204SJose E. Roman                                        MatSolve_ScaLAPACK,
1330d24d4204SJose E. Roman                                        MatSolveAdd_ScaLAPACK,
1331d24d4204SJose E. Roman                                        0,
1332d24d4204SJose E. Roman                                        /*10*/ 0,
1333d24d4204SJose E. Roman                                        MatLUFactor_ScaLAPACK,
1334d24d4204SJose E. Roman                                        MatCholeskyFactor_ScaLAPACK,
1335d24d4204SJose E. Roman                                        0,
1336d24d4204SJose E. Roman                                        MatTranspose_ScaLAPACK,
1337d24d4204SJose E. Roman                                        /*15*/ MatGetInfo_ScaLAPACK,
1338d24d4204SJose E. Roman                                        0,
1339d24d4204SJose E. Roman                                        MatGetDiagonal_ScaLAPACK,
1340d24d4204SJose E. Roman                                        MatDiagonalScale_ScaLAPACK,
1341d24d4204SJose E. Roman                                        MatNorm_ScaLAPACK,
1342d24d4204SJose E. Roman                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1343d24d4204SJose E. Roman                                        MatAssemblyEnd_ScaLAPACK,
1344d24d4204SJose E. Roman                                        MatSetOption_ScaLAPACK,
1345d24d4204SJose E. Roman                                        MatZeroEntries_ScaLAPACK,
1346d24d4204SJose E. Roman                                        /*24*/ 0,
1347d24d4204SJose E. Roman                                        MatLUFactorSymbolic_ScaLAPACK,
1348d24d4204SJose E. Roman                                        MatLUFactorNumeric_ScaLAPACK,
1349d24d4204SJose E. Roman                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1350d24d4204SJose E. Roman                                        MatCholeskyFactorNumeric_ScaLAPACK,
1351d24d4204SJose E. Roman                                        /*29*/ MatSetUp_ScaLAPACK,
1352d24d4204SJose E. Roman                                        0,
1353d24d4204SJose E. Roman                                        0,
1354d24d4204SJose E. Roman                                        0,
1355d24d4204SJose E. Roman                                        0,
1356d24d4204SJose E. Roman                                        /*34*/ MatDuplicate_ScaLAPACK,
1357d24d4204SJose E. Roman                                        0,
1358d24d4204SJose E. Roman                                        0,
1359d24d4204SJose E. Roman                                        0,
1360d24d4204SJose E. Roman                                        0,
1361d24d4204SJose E. Roman                                        /*39*/ MatAXPY_ScaLAPACK,
1362d24d4204SJose E. Roman                                        0,
1363d24d4204SJose E. Roman                                        0,
1364d24d4204SJose E. Roman                                        0,
1365d24d4204SJose E. Roman                                        MatCopy_ScaLAPACK,
1366d24d4204SJose E. Roman                                        /*44*/ 0,
1367d24d4204SJose E. Roman                                        MatScale_ScaLAPACK,
1368d24d4204SJose E. Roman                                        MatShift_ScaLAPACK,
1369d24d4204SJose E. Roman                                        0,
1370d24d4204SJose E. Roman                                        0,
1371d24d4204SJose E. Roman                                        /*49*/ 0,
1372d24d4204SJose E. Roman                                        0,
1373d24d4204SJose E. Roman                                        0,
1374d24d4204SJose E. Roman                                        0,
1375d24d4204SJose E. Roman                                        0,
1376d24d4204SJose E. Roman                                        /*54*/ 0,
1377d24d4204SJose E. Roman                                        0,
1378d24d4204SJose E. Roman                                        0,
1379d24d4204SJose E. Roman                                        0,
1380d24d4204SJose E. Roman                                        0,
1381d24d4204SJose E. Roman                                        /*59*/ 0,
1382d24d4204SJose E. Roman                                        MatDestroy_ScaLAPACK,
1383d24d4204SJose E. Roman                                        MatView_ScaLAPACK,
1384d24d4204SJose E. Roman                                        0,
1385d24d4204SJose E. Roman                                        0,
1386d24d4204SJose E. Roman                                        /*64*/ 0,
1387d24d4204SJose E. Roman                                        0,
1388d24d4204SJose E. Roman                                        0,
1389d24d4204SJose E. Roman                                        0,
1390d24d4204SJose E. Roman                                        0,
1391d24d4204SJose E. Roman                                        /*69*/ 0,
1392d24d4204SJose E. Roman                                        0,
1393d24d4204SJose E. Roman                                        MatConvert_ScaLAPACK_Dense,
1394d24d4204SJose E. Roman                                        0,
1395d24d4204SJose E. Roman                                        0,
1396d24d4204SJose E. Roman                                        /*74*/ 0,
1397d24d4204SJose E. Roman                                        0,
1398d24d4204SJose E. Roman                                        0,
1399d24d4204SJose E. Roman                                        0,
1400d24d4204SJose E. Roman                                        0,
1401d24d4204SJose E. Roman                                        /*79*/ 0,
1402d24d4204SJose E. Roman                                        0,
1403d24d4204SJose E. Roman                                        0,
1404d24d4204SJose E. Roman                                        0,
1405d24d4204SJose E. Roman                                        MatLoad_ScaLAPACK,
1406d24d4204SJose E. Roman                                        /*84*/ 0,
1407d24d4204SJose E. Roman                                        0,
1408d24d4204SJose E. Roman                                        0,
1409d24d4204SJose E. Roman                                        0,
1410d24d4204SJose E. Roman                                        0,
1411d24d4204SJose E. Roman                                        /*89*/ 0,
1412d24d4204SJose E. Roman                                        0,
1413d24d4204SJose E. Roman                                        MatMatMultNumeric_ScaLAPACK,
1414d24d4204SJose E. Roman                                        0,
1415d24d4204SJose E. Roman                                        0,
1416d24d4204SJose E. Roman                                        /*94*/ 0,
1417d24d4204SJose E. Roman                                        0,
1418d24d4204SJose E. Roman                                        0,
1419d24d4204SJose E. Roman                                        MatMatTransposeMultNumeric_ScaLAPACK,
1420d24d4204SJose E. Roman                                        0,
1421d24d4204SJose E. Roman                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1422d24d4204SJose E. Roman                                        0,
1423d24d4204SJose E. Roman                                        0,
1424d24d4204SJose E. Roman                                        MatConjugate_ScaLAPACK,
1425d24d4204SJose E. Roman                                        0,
1426d24d4204SJose E. Roman                                        /*104*/ 0,
1427d24d4204SJose E. Roman                                        0,
1428d24d4204SJose E. Roman                                        0,
1429d24d4204SJose E. Roman                                        0,
1430d24d4204SJose E. Roman                                        0,
1431d24d4204SJose E. Roman                                        /*109*/ MatMatSolve_ScaLAPACK,
1432d24d4204SJose E. Roman                                        0,
1433d24d4204SJose E. Roman                                        0,
1434d24d4204SJose E. Roman                                        0,
1435d24d4204SJose E. Roman                                        MatMissingDiagonal_ScaLAPACK,
1436d24d4204SJose E. Roman                                        /*114*/ 0,
1437d24d4204SJose E. Roman                                        0,
1438d24d4204SJose E. Roman                                        0,
1439d24d4204SJose E. Roman                                        0,
1440d24d4204SJose E. Roman                                        0,
1441d24d4204SJose E. Roman                                        /*119*/ 0,
1442d24d4204SJose E. Roman                                        MatHermitianTranspose_ScaLAPACK,
1443d24d4204SJose E. Roman                                        0,
1444d24d4204SJose E. Roman                                        0,
1445d24d4204SJose E. Roman                                        0,
1446d24d4204SJose E. Roman                                        /*124*/ 0,
1447d24d4204SJose E. Roman                                        0,
1448d24d4204SJose E. Roman                                        0,
1449d24d4204SJose E. Roman                                        0,
1450d24d4204SJose E. Roman                                        0,
1451d24d4204SJose E. Roman                                        /*129*/ 0,
1452d24d4204SJose E. Roman                                        0,
1453d24d4204SJose E. Roman                                        0,
1454d24d4204SJose E. Roman                                        0,
1455d24d4204SJose E. Roman                                        0,
1456d24d4204SJose E. Roman                                        /*134*/ 0,
1457d24d4204SJose E. Roman                                        0,
1458d24d4204SJose E. Roman                                        0,
1459d24d4204SJose E. Roman                                        0,
1460d24d4204SJose E. Roman                                        0,
1461d24d4204SJose E. Roman                                        0,
1462d24d4204SJose E. Roman                                        /*140*/ 0,
1463d24d4204SJose E. Roman                                        0,
1464d24d4204SJose E. Roman                                        0,
1465d24d4204SJose E. Roman                                        0,
1466d24d4204SJose E. Roman                                        0,
1467d24d4204SJose E. Roman                                        /*145*/ 0,
1468d24d4204SJose E. Roman                                        0,
146999a7f59eSMark Adams                                        0,
147099a7f59eSMark Adams                                        0,
14717fb60732SBarry Smith                                        0,
1472ed6168d8SPierre Jolivet                                        /*150*/ 0,
1473ed6168d8SPierre Jolivet                                        0};
1474d24d4204SJose E. Roman 
1475d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1476d71ae5a4SJacob Faibussowitsch {
1477d24d4204SJose E. Roman   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1478d24d4204SJose E. Roman   PetscInt           size = stash->size, nsends;
1479d24d4204SJose E. Roman   PetscInt           count, *sindices, **rindices, i, j, l;
1480d24d4204SJose E. Roman   PetscScalar      **rvalues, *svalues;
1481d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1482d24d4204SJose E. Roman   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1483d24d4204SJose E. Roman   PetscMPIInt       *sizes, *nlengths, nreceives;
1484d24d4204SJose E. Roman   PetscInt          *sp_idx, *sp_idy;
1485d24d4204SJose E. Roman   PetscScalar       *sp_val;
1486d24d4204SJose E. Roman   PetscMatStashSpace space, space_next;
1487d24d4204SJose E. Roman   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1488d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1489d24d4204SJose E. Roman 
1490d24d4204SJose E. Roman   PetscFunctionBegin;
1491d24d4204SJose E. Roman   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1492d24d4204SJose E. Roman     InsertMode addv;
14931c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
149408401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1495d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1496d24d4204SJose E. Roman   }
1497d24d4204SJose E. Roman 
1498d24d4204SJose E. Roman   bs2 = stash->bs * stash->bs;
1499d24d4204SJose E. Roman 
1500d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
15019566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
15029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1503d24d4204SJose E. Roman 
1504d24d4204SJose E. Roman   i = j = 0;
1505d24d4204SJose E. Roman   space = stash->space_head;
1506d24d4204SJose E. Roman   while (space) {
1507d24d4204SJose E. Roman     space_next = space->next;
1508d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
15099566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
15109566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1511792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1512d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
15139371c9d4SSatish Balay       nlengths[j]++;
15149371c9d4SSatish Balay       owner[i] = j;
1515d24d4204SJose E. Roman       i++;
1516d24d4204SJose E. Roman     }
1517d24d4204SJose E. Roman     space = space_next;
1518d24d4204SJose E. Roman   }
1519d24d4204SJose E. Roman 
1520d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
15219566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
1522d24d4204SJose E. Roman   for (i = 0, nsends = 0; i < size; i++) {
1523d24d4204SJose E. Roman     if (nlengths[i]) {
15249371c9d4SSatish Balay       sizes[i] = 1;
15259371c9d4SSatish Balay       nsends++;
1526d24d4204SJose E. Roman     }
1527d24d4204SJose E. Roman   }
1528d24d4204SJose E. Roman 
15299371c9d4SSatish Balay   {
15309371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
1531d24d4204SJose E. Roman     /* Determine the number of messages to expect, their lengths, from from-ids */
15329566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
15339566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1534d24d4204SJose E. Roman     /* since clubbing row,col - lengths are multiplied by 2 */
1535d24d4204SJose E. Roman     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
15369566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1537d24d4204SJose E. Roman     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1538d24d4204SJose E. Roman     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
15399566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
15409566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
15419371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
15429371c9d4SSatish Balay   }
1543d24d4204SJose E. Roman 
1544d24d4204SJose E. Roman   /* do sends:
1545d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1546d24d4204SJose E. Roman          the ith processor
1547d24d4204SJose E. Roman   */
15489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
15499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
15509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1551d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
15529371c9d4SSatish Balay   startv[0] = 0;
15539371c9d4SSatish Balay   starti[0] = 0;
1554d24d4204SJose E. Roman   for (i = 1; i < size; i++) {
1555d24d4204SJose E. Roman     startv[i] = startv[i - 1] + nlengths[i - 1];
1556d24d4204SJose E. Roman     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1557d24d4204SJose E. Roman   }
1558d24d4204SJose E. Roman 
1559d24d4204SJose E. Roman   i     = 0;
1560d24d4204SJose E. Roman   space = stash->space_head;
1561d24d4204SJose E. Roman   while (space) {
1562d24d4204SJose E. Roman     space_next = space->next;
1563d24d4204SJose E. Roman     sp_idx     = space->idx;
1564d24d4204SJose E. Roman     sp_idy     = space->idy;
1565d24d4204SJose E. Roman     sp_val     = space->val;
1566d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
1567d24d4204SJose E. Roman       j = owner[i];
1568d24d4204SJose E. Roman       if (bs2 == 1) {
1569d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1570d24d4204SJose E. Roman       } else {
1571d24d4204SJose E. Roman         PetscInt     k;
1572d24d4204SJose E. Roman         PetscScalar *buf1, *buf2;
1573d24d4204SJose E. Roman         buf1 = svalues + bs2 * startv[j];
1574d24d4204SJose E. Roman         buf2 = space->val + bs2 * l;
1575d24d4204SJose E. Roman         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1576d24d4204SJose E. Roman       }
1577d24d4204SJose E. Roman       sindices[starti[j]]               = sp_idx[l];
1578d24d4204SJose E. Roman       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1579d24d4204SJose E. Roman       startv[j]++;
1580d24d4204SJose E. Roman       starti[j]++;
1581d24d4204SJose E. Roman       i++;
1582d24d4204SJose E. Roman     }
1583d24d4204SJose E. Roman     space = space_next;
1584d24d4204SJose E. Roman   }
1585d24d4204SJose E. Roman   startv[0] = 0;
1586d24d4204SJose E. Roman   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1587d24d4204SJose E. Roman 
1588d24d4204SJose E. Roman   for (i = 0, count = 0; i < size; i++) {
1589d24d4204SJose E. Roman     if (sizes[i]) {
15909566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
15919566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1592d24d4204SJose E. Roman     }
1593d24d4204SJose E. Roman   }
1594d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
15959566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1596d24d4204SJose E. Roman   for (i = 0; i < size; i++) {
159748a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1598d24d4204SJose E. Roman   }
1599d24d4204SJose E. Roman #endif
16009566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
16019566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
16029566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
16039566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
1604d24d4204SJose E. Roman 
1605d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
16069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1607d24d4204SJose E. Roman 
1608d24d4204SJose E. Roman   for (i = 0; i < nreceives; i++) {
1609d24d4204SJose E. Roman     recv_waits[2 * i]     = recv_waits1[i];
1610d24d4204SJose E. Roman     recv_waits[2 * i + 1] = recv_waits2[i];
1611d24d4204SJose E. Roman   }
1612d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1613d24d4204SJose E. Roman 
16149566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
16159566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
1616d24d4204SJose E. Roman 
1617d24d4204SJose E. Roman   stash->svalues         = svalues;
1618d24d4204SJose E. Roman   stash->sindices        = sindices;
1619d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1620d24d4204SJose E. Roman   stash->rindices        = rindices;
1621d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1622d24d4204SJose E. Roman   stash->nsends          = nsends;
1623d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1624d24d4204SJose E. Roman   stash->reproduce_count = 0;
16253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1626d24d4204SJose E. Roman }
1627d24d4204SJose E. Roman 
1628d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1629d71ae5a4SJacob Faibussowitsch {
1630d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1631d24d4204SJose E. Roman 
1632d24d4204SJose E. Roman   PetscFunctionBegin;
163328b400f6SJacob Faibussowitsch   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1634aed4548fSBarry Smith   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1635aed4548fSBarry Smith   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
16369566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
16379566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
16383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1639d24d4204SJose E. Roman }
1640d24d4204SJose E. Roman 
1641d24d4204SJose E. Roman /*@
16426aad120cSJose E. Roman   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
164311a5261eSBarry Smith   the `MATSCALAPACK` matrix
1644d24d4204SJose E. Roman 
1645c3339decSBarry Smith   Logically Collective
1646d24d4204SJose E. Roman 
1647d8d19677SJose E. Roman   Input Parameters:
164811a5261eSBarry Smith + A  - a `MATSCALAPACK` matrix
1649d24d4204SJose E. Roman . mb - the row block size
1650d24d4204SJose E. Roman - nb - the column block size
1651d24d4204SJose E. Roman 
1652d24d4204SJose E. Roman   Level: intermediate
1653d24d4204SJose E. Roman 
16542ef1f0ffSBarry Smith   Note:
16552ef1f0ffSBarry Smith   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
16562ef1f0ffSBarry Smith 
16571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1658d24d4204SJose E. Roman @*/
1659d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1660d71ae5a4SJacob Faibussowitsch {
1661d24d4204SJose E. Roman   PetscFunctionBegin;
1662d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1663d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, mb, 2);
1664d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, nb, 3);
1665cac4c232SBarry Smith   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
16663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1667d24d4204SJose E. Roman }
1668d24d4204SJose E. Roman 
1669d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1670d71ae5a4SJacob Faibussowitsch {
1671d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1672d24d4204SJose E. Roman 
1673d24d4204SJose E. Roman   PetscFunctionBegin;
1674d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1675d24d4204SJose E. Roman   if (nb) *nb = a->nb;
16763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1677d24d4204SJose E. Roman }
1678d24d4204SJose E. Roman 
1679d24d4204SJose E. Roman /*@
16806aad120cSJose E. Roman   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
168111a5261eSBarry Smith   the `MATSCALAPACK` matrix
1682d24d4204SJose E. Roman 
16832ef1f0ffSBarry Smith   Not Collective
1684d24d4204SJose E. Roman 
1685d24d4204SJose E. Roman   Input Parameter:
168611a5261eSBarry Smith . A - a `MATSCALAPACK` matrix
1687d24d4204SJose E. Roman 
1688d24d4204SJose E. Roman   Output Parameters:
1689d24d4204SJose E. Roman + mb - the row block size
1690d24d4204SJose E. Roman - nb - the column block size
1691d24d4204SJose E. Roman 
1692d24d4204SJose E. Roman   Level: intermediate
1693d24d4204SJose E. Roman 
16942ef1f0ffSBarry Smith   Note:
16952ef1f0ffSBarry Smith   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
16962ef1f0ffSBarry Smith 
16971cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1698d24d4204SJose E. Roman @*/
1699d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1700d71ae5a4SJacob Faibussowitsch {
1701d24d4204SJose E. Roman   PetscFunctionBegin;
1702d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1703cac4c232SBarry Smith   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
17043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1705d24d4204SJose E. Roman }
1706d24d4204SJose E. Roman 
1707d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1708d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1709d24d4204SJose E. Roman 
1710d24d4204SJose E. Roman /*MC
1711d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1712d24d4204SJose E. Roman 
17132ef1f0ffSBarry Smith    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1714d24d4204SJose E. Roman 
1715d24d4204SJose E. Roman    Options Database Keys:
17162ef1f0ffSBarry Smith +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
17172ef1f0ffSBarry Smith .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1718d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1719d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1720d24d4204SJose E. Roman 
17212ef1f0ffSBarry Smith    Level: intermediate
17222ef1f0ffSBarry Smith 
172389bba20eSBarry Smith   Note:
172489bba20eSBarry Smith    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
172589bba20eSBarry Smith    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
172689bba20eSBarry Smith    the given rank.
172789bba20eSBarry Smith 
17281cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1729d24d4204SJose E. Roman M*/
1730d24d4204SJose E. Roman 
1731d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1732d71ae5a4SJacob Faibussowitsch {
1733d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1734d24d4204SJose E. Roman   PetscBool           flg, flg1;
1735d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1736d24d4204SJose E. Roman   MPI_Comm            icomm;
1737d24d4204SJose E. Roman   PetscBLASInt        nprow, npcol, myrow, mycol;
1738d24d4204SJose E. Roman   PetscInt            optv1, k = 2, array[2] = {0, 0};
1739d24d4204SJose E. Roman   PetscMPIInt         size;
1740d24d4204SJose E. Roman 
1741d24d4204SJose E. Roman   PetscFunctionBegin;
1742aea10558SJacob Faibussowitsch   A->ops[0]     = MatOps_Values;
1743d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1744d24d4204SJose E. Roman 
17459566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1746d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1747d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1748d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1749d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1750d24d4204SJose E. Roman 
17514dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&a));
1752d24d4204SJose E. Roman   A->data = (void *)a;
1753d24d4204SJose E. Roman 
1754d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1755d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
17569566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
17579566063dSJacob Faibussowitsch     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
17589566063dSJacob Faibussowitsch     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1759d24d4204SJose E. Roman   }
17609566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
17619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1762d24d4204SJose E. Roman   if (!flg) {
17634dfa11a4SJacob Faibussowitsch     PetscCall(PetscNew(&grid));
1764d24d4204SJose E. Roman 
17659566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(icomm, &size));
1766d24d4204SJose E. Roman     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1767d24d4204SJose E. Roman 
1768d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
17699566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1770d24d4204SJose E. Roman     if (flg1) {
177108401ef6SPierre Jolivet       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1772d24d4204SJose E. Roman       grid->nprow = optv1;
1773d24d4204SJose E. Roman     }
1774d0609cedSBarry Smith     PetscOptionsEnd();
1775d24d4204SJose E. Roman 
1776d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1777d24d4204SJose E. Roman     grid->npcol = size / grid->nprow;
17789566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
17799566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1780f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1781d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1782d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1783d24d4204SJose E. Roman     grid->grid_refct = 1;
1784d24d4204SJose E. Roman     grid->nprow      = nprow;
1785d24d4204SJose E. Roman     grid->npcol      = npcol;
1786d24d4204SJose E. Roman     grid->myrow      = myrow;
1787d24d4204SJose E. Roman     grid->mycol      = mycol;
1788d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1789f7ec113fSDamian Marek     grid->ictxrow = Csys2blacs_handle(icomm);
1790d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1791f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1792d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
17939566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1794d24d4204SJose E. Roman 
1795d24d4204SJose E. Roman   } else grid->grid_refct++;
17969566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
1797d24d4204SJose E. Roman   a->grid = grid;
1798d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1799d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1800d24d4204SJose E. Roman 
1801d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
18029566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1803d24d4204SJose E. Roman   if (flg) {
1804d24d4204SJose E. Roman     a->mb = array[0];
1805d24d4204SJose E. Roman     a->nb = (k > 1) ? array[1] : a->mb;
1806d24d4204SJose E. Roman   }
1807d0609cedSBarry Smith   PetscOptionsEnd();
1808d24d4204SJose E. Roman 
1809b12397e7SPierre Jolivet   a->roworiented = PETSC_TRUE;
18109566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
18119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
18129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
18139566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
18143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1815d24d4204SJose E. Roman }
1816d24d4204SJose E. Roman 
1817d24d4204SJose E. Roman /*@C
1818d24d4204SJose E. Roman   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
181911a5261eSBarry Smith   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1820d24d4204SJose E. Roman 
1821d24d4204SJose E. Roman   Collective
1822d24d4204SJose E. Roman 
1823d24d4204SJose E. Roman   Input Parameters:
1824d24d4204SJose E. Roman + comm - MPI communicator
182511a5261eSBarry Smith . mb   - row block size (or `PETSC_DECIDE` to have it set)
182611a5261eSBarry Smith . nb   - column block size (or `PETSC_DECIDE` to have it set)
1827d24d4204SJose E. Roman . M    - number of global rows
1828d24d4204SJose E. Roman . N    - number of global columns
1829d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix
1830d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix
1831d24d4204SJose E. Roman 
1832d24d4204SJose E. Roman   Output Parameter:
1833d24d4204SJose E. Roman . A - the matrix
1834d24d4204SJose E. Roman 
183511a5261eSBarry Smith   Options Database Key:
1836d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1837d24d4204SJose E. Roman 
18382ef1f0ffSBarry Smith   Level: intermediate
18392ef1f0ffSBarry Smith 
18402ef1f0ffSBarry Smith   Notes:
18412ef1f0ffSBarry Smith   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
18422ef1f0ffSBarry Smith 
184311a5261eSBarry Smith   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1844d24d4204SJose E. Roman   MatXXXXSetPreallocation() paradigm instead of this routine directly.
184511a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1846d24d4204SJose E. Roman 
1847d24d4204SJose E. Roman   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1848d24d4204SJose E. Roman   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1849d24d4204SJose E. Roman   significance and are thus ignored. The block sizes refer to the values
185011a5261eSBarry Smith   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1851d24d4204SJose E. Roman 
18521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1853d24d4204SJose E. Roman @*/
1854d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1855d71ae5a4SJacob Faibussowitsch {
1856d24d4204SJose E. Roman   Mat_ScaLAPACK *a;
1857d24d4204SJose E. Roman   PetscInt       m, n;
1858d24d4204SJose E. Roman 
1859d24d4204SJose E. Roman   PetscFunctionBegin;
18609566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
18619566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSCALAPACK));
1862aed4548fSBarry Smith   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1863d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1864d24d4204SJose E. Roman   m = PETSC_DECIDE;
18659566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1866d24d4204SJose E. Roman   n = PETSC_DECIDE;
18679566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
18689566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1869d24d4204SJose E. Roman   a = (Mat_ScaLAPACK *)(*A)->data;
18709566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(M, &a->M));
18719566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(N, &a->N));
18729566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
18739566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
18749566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
18759566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
18769566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*A));
18773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1878d24d4204SJose E. Roman }
1879