xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
327e75052SPierre Jolivet const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet                                        "       PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet                                        "       ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet                                        "       YEAR = 1997\n"
1127e75052SPierre Jolivet                                        "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite       = PETSC_FALSE;
1327e75052SPierre Jolivet 
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman 
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman 
229371c9d4SSatish Balay static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) {
23f7ec113fSDamian Marek   PetscFunctionBegin;
249566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
259566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
26f7ec113fSDamian Marek   PetscFunctionReturn(0);
27f7ec113fSDamian Marek }
28f7ec113fSDamian Marek 
299371c9d4SSatish Balay static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) {
30d24d4204SJose E. Roman   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
31d24d4204SJose E. Roman   PetscBool         iascii;
32d24d4204SJose E. Roman   PetscViewerFormat format;
33d24d4204SJose E. Roman   Mat               Adense;
34d24d4204SJose E. Roman 
35d24d4204SJose E. Roman   PetscFunctionBegin;
369566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
37d24d4204SJose E. Roman   if (iascii) {
389566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
39d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
409566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
419566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
429566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
439566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
44d24d4204SJose E. Roman       PetscFunctionReturn(0);
45d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
46d24d4204SJose E. Roman       PetscFunctionReturn(0);
47d24d4204SJose E. Roman     }
48d24d4204SJose E. Roman   }
49d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
509566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
519566063dSJacob Faibussowitsch   PetscCall(MatView(Adense, viewer));
529566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
53d24d4204SJose E. Roman   PetscFunctionReturn(0);
54d24d4204SJose E. Roman }
55d24d4204SJose E. Roman 
569371c9d4SSatish Balay static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) {
57d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
58d24d4204SJose E. Roman   PetscLogDouble isend[2], irecv[2];
59d24d4204SJose E. Roman 
60d24d4204SJose E. Roman   PetscFunctionBegin;
61d24d4204SJose E. Roman   info->block_size = 1.0;
62d24d4204SJose E. Roman 
63d24d4204SJose E. Roman   isend[0] = a->lld * a->locc;  /* locally allocated */
64d24d4204SJose E. Roman   isend[1] = a->locr * a->locc; /* used submatrix */
65d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
66d24d4204SJose E. Roman     info->nz_allocated = isend[0];
67d24d4204SJose E. Roman     info->nz_used      = isend[1];
68d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
6957168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
70d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
71d24d4204SJose E. Roman     info->nz_used      = irecv[1];
72d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
7357168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
74d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
75d24d4204SJose E. Roman     info->nz_used      = irecv[1];
76d24d4204SJose E. Roman   }
77d24d4204SJose E. Roman 
78d24d4204SJose E. Roman   info->nz_unneeded       = 0;
79d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
80d24d4204SJose E. Roman   info->mallocs           = 0;
81d24d4204SJose E. Roman   info->memory            = ((PetscObject)A)->mem;
82d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
83d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
84d24d4204SJose E. Roman   info->factor_mallocs    = 0;
85d24d4204SJose E. Roman   PetscFunctionReturn(0);
86d24d4204SJose E. Roman }
87d24d4204SJose E. Roman 
889371c9d4SSatish Balay PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) {
89b12397e7SPierre Jolivet   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
90b12397e7SPierre Jolivet 
91d24d4204SJose E. Roman   PetscFunctionBegin;
92d24d4204SJose E. Roman   switch (op) {
93d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATIONS:
94d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATION_ERR:
95d24d4204SJose E. Roman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
96d24d4204SJose E. Roman   case MAT_SYMMETRIC:
97d24d4204SJose E. Roman   case MAT_SORTED_FULL:
989371c9d4SSatish Balay   case MAT_HERMITIAN: break;
999371c9d4SSatish Balay   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
1009371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
101d24d4204SJose E. Roman   }
102d24d4204SJose E. Roman   PetscFunctionReturn(0);
103d24d4204SJose E. Roman }
104d24d4204SJose E. Roman 
1059371c9d4SSatish Balay static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) {
106d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
107d24d4204SJose E. Roman   PetscInt       i, j;
108d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
109b12397e7SPierre Jolivet   PetscBool      roworiented = a->roworiented;
110d24d4204SJose E. Roman 
111d24d4204SJose E. Roman   PetscFunctionBegin;
112b12397e7SPierre Jolivet   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
113d24d4204SJose E. Roman   for (i = 0; i < nr; i++) {
114d24d4204SJose E. Roman     if (rows[i] < 0) continue;
1159566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
116d24d4204SJose E. Roman     for (j = 0; j < nc; j++) {
117d24d4204SJose E. Roman       if (cols[j] < 0) continue;
1189566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
119792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
120d24d4204SJose E. Roman       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
121b12397e7SPierre Jolivet         if (roworiented) {
122d24d4204SJose E. Roman           switch (imode) {
123d24d4204SJose E. Roman           case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break;
124b12397e7SPierre Jolivet           default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break;
125b12397e7SPierre Jolivet           }
126b12397e7SPierre Jolivet         } else {
127b12397e7SPierre Jolivet           switch (imode) {
128b12397e7SPierre Jolivet           case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break;
129b12397e7SPierre Jolivet           default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break;
130b12397e7SPierre Jolivet           }
131d24d4204SJose E. Roman         }
132d24d4204SJose E. Roman       } else {
13328b400f6SJacob Faibussowitsch         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
134d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
135b12397e7SPierre Jolivet         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
136d24d4204SJose E. Roman       }
137d24d4204SJose E. Roman     }
138d24d4204SJose E. Roman   }
139d24d4204SJose E. Roman   PetscFunctionReturn(0);
140d24d4204SJose E. Roman }
141d24d4204SJose E. Roman 
1429371c9d4SSatish Balay static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) {
143d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
144d24d4204SJose E. Roman   PetscScalar    *x2d, *y2d, alpha = 1.0;
145d24d4204SJose E. Roman   const PetscInt *ranges;
146d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
147d24d4204SJose E. Roman 
148d24d4204SJose E. Roman   PetscFunctionBegin;
149d24d4204SJose E. Roman   if (transpose) {
150d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1519566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1529566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
153d24d4204SJose E. Roman     xlld = PetscMax(1, A->rmap->n);
154792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
155d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1569566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
1579566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
158d24d4204SJose E. Roman     ylld = 1;
159792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
160d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
161d24d4204SJose E. Roman 
162d24d4204SJose E. Roman     /* allocate 2d vectors */
163d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
164d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
166d24d4204SJose E. Roman     xlld = PetscMax(1, lszx);
167d24d4204SJose E. Roman 
168d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
169792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
170d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
171792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
172d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
173d24d4204SJose E. Roman 
174d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
175792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
176d24d4204SJose E. Roman 
177d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
178792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
179d24d4204SJose E. Roman 
180d24d4204SJose E. Roman     /* call PBLAS subroutine */
181792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
182d24d4204SJose E. Roman 
183d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
184792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
185d24d4204SJose E. Roman 
186d24d4204SJose E. Roman   } else { /* non-transpose */
187d24d4204SJose E. Roman 
188d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1899566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
1909566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
191d24d4204SJose E. Roman     xlld = 1;
192792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
193d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1949566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1959566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
196d24d4204SJose E. Roman     ylld = PetscMax(1, A->rmap->n);
197792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
198d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
199d24d4204SJose E. Roman 
200d24d4204SJose E. Roman     /* allocate 2d vectors */
201d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
202d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
2039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
204d24d4204SJose E. Roman     ylld = PetscMax(1, lszy);
205d24d4204SJose E. Roman 
206d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
207792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
208d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
209792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
210d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
211d24d4204SJose E. Roman 
212d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
213792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
214d24d4204SJose E. Roman 
215d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
216792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
217d24d4204SJose E. Roman 
218d24d4204SJose E. Roman     /* call PBLAS subroutine */
219792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
220d24d4204SJose E. Roman 
221d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
222792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
223d24d4204SJose E. Roman   }
2249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(x2d, y2d));
225d24d4204SJose E. Roman   PetscFunctionReturn(0);
226d24d4204SJose E. Roman }
227d24d4204SJose E. Roman 
2289371c9d4SSatish Balay static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) {
229d24d4204SJose E. Roman   const PetscScalar *xarray;
230d24d4204SJose E. Roman   PetscScalar       *yarray;
231d24d4204SJose E. Roman 
232d24d4204SJose E. Roman   PetscFunctionBegin;
2339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2349566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
2359566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
2369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
238d24d4204SJose E. Roman   PetscFunctionReturn(0);
239d24d4204SJose E. Roman }
240d24d4204SJose E. Roman 
2419371c9d4SSatish Balay static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) {
242d24d4204SJose E. Roman   const PetscScalar *xarray;
243d24d4204SJose E. Roman   PetscScalar       *yarray;
244d24d4204SJose E. Roman 
245d24d4204SJose E. Roman   PetscFunctionBegin;
2469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
2489566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
2499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
251d24d4204SJose E. Roman   PetscFunctionReturn(0);
252d24d4204SJose E. Roman }
253d24d4204SJose E. Roman 
2549371c9d4SSatish Balay static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) {
255d24d4204SJose E. Roman   const PetscScalar *xarray;
256d24d4204SJose E. Roman   PetscScalar       *zarray;
257d24d4204SJose E. Roman 
258d24d4204SJose E. Roman   PetscFunctionBegin;
2599566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
2609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2619566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
2629566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
2639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
265d24d4204SJose E. Roman   PetscFunctionReturn(0);
266d24d4204SJose E. Roman }
267d24d4204SJose E. Roman 
2689371c9d4SSatish Balay static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) {
269d24d4204SJose E. Roman   const PetscScalar *xarray;
270d24d4204SJose E. Roman   PetscScalar       *zarray;
271d24d4204SJose E. Roman 
272d24d4204SJose E. Roman   PetscFunctionBegin;
2739566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
2749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
2769566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
2779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
279d24d4204SJose E. Roman   PetscFunctionReturn(0);
280d24d4204SJose E. Roman }
281d24d4204SJose E. Roman 
2829371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) {
283d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
284d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
285d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
286d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
287d24d4204SJose E. Roman   PetscBLASInt   one = 1;
288d24d4204SJose E. Roman 
289d24d4204SJose E. Roman   PetscFunctionBegin;
290792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
291d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
292d24d4204SJose E. Roman   PetscFunctionReturn(0);
293d24d4204SJose E. Roman }
294d24d4204SJose E. Roman 
2959371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) {
296d24d4204SJose E. Roman   PetscFunctionBegin;
2979566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2989566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
2999566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
300d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
301d24d4204SJose E. Roman   PetscFunctionReturn(0);
302d24d4204SJose E. Roman }
303d24d4204SJose E. Roman 
3049371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) {
305d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
306d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
307d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
308d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
309d24d4204SJose E. Roman   PetscBLASInt   one = 1;
310d24d4204SJose E. Roman 
311d24d4204SJose E. Roman   PetscFunctionBegin;
312792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
313d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
314d24d4204SJose E. Roman   PetscFunctionReturn(0);
315d24d4204SJose E. Roman }
316d24d4204SJose E. Roman 
3179371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) {
318d24d4204SJose E. Roman   PetscFunctionBegin;
3199566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
3209566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
3219566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
322d24d4204SJose E. Roman   PetscFunctionReturn(0);
323d24d4204SJose E. Roman }
324d24d4204SJose E. Roman 
325d24d4204SJose E. Roman /* --------------------------------------- */
3269371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) {
327d24d4204SJose E. Roman   PetscFunctionBegin;
328d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
329d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
330d24d4204SJose E. Roman   PetscFunctionReturn(0);
331d24d4204SJose E. Roman }
332d24d4204SJose E. Roman 
3339371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) {
334d24d4204SJose E. Roman   PetscFunctionBegin;
335d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
336d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
337d24d4204SJose E. Roman   PetscFunctionReturn(0);
338d24d4204SJose E. Roman }
339d24d4204SJose E. Roman 
3409371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) {
341d24d4204SJose E. Roman   Mat_Product *product = C->product;
342d24d4204SJose E. Roman 
343d24d4204SJose E. Roman   PetscFunctionBegin;
344d24d4204SJose E. Roman   switch (product->type) {
3459371c9d4SSatish Balay   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break;
3469371c9d4SSatish Balay   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break;
34798921bdaSJacob Faibussowitsch   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
348d24d4204SJose E. Roman   }
349d24d4204SJose E. Roman   PetscFunctionReturn(0);
350d24d4204SJose E. Roman }
351d24d4204SJose E. Roman /* --------------------------------------- */
352d24d4204SJose E. Roman 
3539371c9d4SSatish Balay static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) {
354d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
355d24d4204SJose E. Roman   PetscScalar    *darray, *d2d, v;
356d24d4204SJose E. Roman   const PetscInt *ranges;
357d24d4204SJose E. Roman   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
358d24d4204SJose E. Roman 
359d24d4204SJose E. Roman   PetscFunctionBegin;
3609566063dSJacob Faibussowitsch   PetscCall(VecGetArray(D, &darray));
361d24d4204SJose E. Roman 
362d24d4204SJose E. Roman   if (A->rmap->N <= A->cmap->N) { /* row version */
363d24d4204SJose E. Roman 
364d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
3659566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
3669566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
367d24d4204SJose E. Roman     dlld = PetscMax(1, A->rmap->n);
368792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
369d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
370d24d4204SJose E. Roman 
371d24d4204SJose E. Roman     /* allocate 2d vector */
372d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
3739566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
374d24d4204SJose E. Roman     dlld = PetscMax(1, lszd);
375d24d4204SJose E. Roman 
376d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
377792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
378d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
379d24d4204SJose E. Roman 
380d24d4204SJose E. Roman     /* collect diagonal */
381d24d4204SJose E. Roman     for (j = 1; j <= a->M; j++) {
382792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
383792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
384d24d4204SJose E. Roman     }
385d24d4204SJose E. Roman 
386d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
387792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
3889566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
389d24d4204SJose E. Roman 
390d24d4204SJose E. Roman   } else { /* column version */
391d24d4204SJose E. Roman 
392d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
3939566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
3949566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
395d24d4204SJose E. Roman     dlld = 1;
396792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
397d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
398d24d4204SJose E. Roman 
399d24d4204SJose E. Roman     /* allocate 2d vector */
400d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4019566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
402d24d4204SJose E. Roman 
403d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
404792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
405d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
406d24d4204SJose E. Roman 
407d24d4204SJose E. Roman     /* collect diagonal */
408d24d4204SJose E. Roman     for (j = 1; j <= a->N; j++) {
409792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
410792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
411d24d4204SJose E. Roman     }
412d24d4204SJose E. Roman 
413d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
414792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
4159566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
416d24d4204SJose E. Roman   }
417d24d4204SJose E. Roman 
4189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(D, &darray));
4199566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(D));
4209566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(D));
421d24d4204SJose E. Roman   PetscFunctionReturn(0);
422d24d4204SJose E. Roman }
423d24d4204SJose E. Roman 
4249371c9d4SSatish Balay static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) {
425d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
426d24d4204SJose E. Roman   const PetscScalar *d;
427d24d4204SJose E. Roman   const PetscInt    *ranges;
428d24d4204SJose E. Roman   PetscScalar       *d2d;
429d24d4204SJose E. Roman   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
430d24d4204SJose E. Roman 
431d24d4204SJose E. Roman   PetscFunctionBegin;
432d24d4204SJose E. Roman   if (R) {
4339566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
434d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4359566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
4369566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
437d24d4204SJose E. Roman     dlld = 1;
438792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
439d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
440d24d4204SJose E. Roman 
441d24d4204SJose E. Roman     /* allocate 2d vector */
442d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4439566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
444d24d4204SJose E. Roman 
445d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
446792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
447d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
448d24d4204SJose E. Roman 
449d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
450792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
451d24d4204SJose E. Roman 
452d24d4204SJose E. Roman     /* broadcast along process columns */
453d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
454d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
455d24d4204SJose E. Roman 
456d24d4204SJose E. Roman     /* local scaling */
4579371c9d4SSatish Balay     for (j = 0; j < a->locc; j++)
4589371c9d4SSatish Balay       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
459d24d4204SJose E. Roman 
4609566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
4619566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
462d24d4204SJose E. Roman   }
463d24d4204SJose E. Roman   if (L) {
4649566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
465d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4669566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
4679566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
468d24d4204SJose E. Roman     dlld = PetscMax(1, A->rmap->n);
469792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
470d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
471d24d4204SJose E. Roman 
472d24d4204SJose E. Roman     /* allocate 2d vector */
473d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
4749566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
475d24d4204SJose E. Roman     dlld = PetscMax(1, lszd);
476d24d4204SJose E. Roman 
477d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
478792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
479d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
480d24d4204SJose E. Roman 
481d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
482792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
483d24d4204SJose E. Roman 
484d24d4204SJose E. Roman     /* broadcast along process rows */
485d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
486d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
487d24d4204SJose E. Roman 
488d24d4204SJose E. Roman     /* local scaling */
4899371c9d4SSatish Balay     for (i = 0; i < a->locr; i++)
4909371c9d4SSatish Balay       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
491d24d4204SJose E. Roman 
4929566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
4939566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
494d24d4204SJose E. Roman   }
495d24d4204SJose E. Roman   PetscFunctionReturn(0);
496d24d4204SJose E. Roman }
497d24d4204SJose E. Roman 
4989371c9d4SSatish Balay static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) {
499d24d4204SJose E. Roman   PetscFunctionBegin;
500d24d4204SJose E. Roman   *missing = PETSC_FALSE;
501d24d4204SJose E. Roman   PetscFunctionReturn(0);
502d24d4204SJose E. Roman }
503d24d4204SJose E. Roman 
5049371c9d4SSatish Balay static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) {
505d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
506d24d4204SJose E. Roman   PetscBLASInt   n, one = 1;
507d24d4204SJose E. Roman 
508d24d4204SJose E. Roman   PetscFunctionBegin;
509d24d4204SJose E. Roman   n = x->lld * x->locc;
510792fecdfSBarry Smith   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
511d24d4204SJose E. Roman   PetscFunctionReturn(0);
512d24d4204SJose E. Roman }
513d24d4204SJose E. Roman 
5149371c9d4SSatish Balay static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) {
515d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
516d24d4204SJose E. Roman   PetscBLASInt   i, n;
517d24d4204SJose E. Roman   PetscScalar    v;
518d24d4204SJose E. Roman 
519d24d4204SJose E. Roman   PetscFunctionBegin;
520d24d4204SJose E. Roman   n = PetscMin(x->M, x->N);
521d24d4204SJose E. Roman   for (i = 1; i <= n; i++) {
522792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
523d24d4204SJose E. Roman     v += alpha;
524792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
525d24d4204SJose E. Roman   }
526d24d4204SJose E. Roman   PetscFunctionReturn(0);
527d24d4204SJose E. Roman }
528d24d4204SJose E. Roman 
5299371c9d4SSatish Balay static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
530d24d4204SJose E. Roman   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
531d24d4204SJose E. Roman   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
532d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
533d24d4204SJose E. Roman   PetscScalar    beta = 1.0;
534d24d4204SJose E. Roman 
535d24d4204SJose E. Roman   PetscFunctionBegin;
536d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
537792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
5389566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
539d24d4204SJose E. Roman   PetscFunctionReturn(0);
540d24d4204SJose E. Roman }
541d24d4204SJose E. Roman 
5429371c9d4SSatish Balay static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) {
543d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
544d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
545d24d4204SJose E. Roman 
546d24d4204SJose E. Roman   PetscFunctionBegin;
5479566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
5489566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
549d24d4204SJose E. Roman   PetscFunctionReturn(0);
550d24d4204SJose E. Roman }
551d24d4204SJose E. Roman 
5529371c9d4SSatish Balay static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) {
553d24d4204SJose E. Roman   Mat            Bs;
554d24d4204SJose E. Roman   MPI_Comm       comm;
555d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
556d24d4204SJose E. Roman 
557d24d4204SJose E. Roman   PetscFunctionBegin;
5589566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5599566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Bs));
5609566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
5619566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bs, MATSCALAPACK));
562d24d4204SJose E. Roman   b       = (Mat_ScaLAPACK *)Bs->data;
563d24d4204SJose E. Roman   b->M    = a->M;
564d24d4204SJose E. Roman   b->N    = a->N;
565d24d4204SJose E. Roman   b->mb   = a->mb;
566d24d4204SJose E. Roman   b->nb   = a->nb;
567d24d4204SJose E. Roman   b->rsrc = a->rsrc;
568d24d4204SJose E. Roman   b->csrc = a->csrc;
5699566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Bs));
570d24d4204SJose E. Roman   *B = Bs;
571*48a46eb9SPierre Jolivet   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
572d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
573d24d4204SJose E. Roman   PetscFunctionReturn(0);
574d24d4204SJose E. Roman }
575d24d4204SJose E. Roman 
5769371c9d4SSatish Balay static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) {
577d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
578d24d4204SJose E. Roman   Mat            Bs   = *B;
579d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
580d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
581d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
582d24d4204SJose E. Roman   PetscInt i, j;
583d24d4204SJose E. Roman #endif
584d24d4204SJose E. Roman 
585d24d4204SJose E. Roman   PetscFunctionBegin;
5867fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
587d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
5889566063dSJacob Faibussowitsch     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
589d24d4204SJose E. Roman     *B = Bs;
590d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
591d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)Bs->data;
592792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
593d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
594d24d4204SJose E. Roman   /* undo conjugation */
5959371c9d4SSatish Balay   for (i = 0; i < b->locr; i++)
5969371c9d4SSatish Balay     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
597d24d4204SJose E. Roman #endif
598d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
599d24d4204SJose E. Roman   PetscFunctionReturn(0);
600d24d4204SJose E. Roman }
601d24d4204SJose E. Roman 
6029371c9d4SSatish Balay static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) {
603d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
604d24d4204SJose E. Roman   PetscInt       i, j;
605d24d4204SJose E. Roman 
606d24d4204SJose E. Roman   PetscFunctionBegin;
6079371c9d4SSatish Balay   for (i = 0; i < a->locr; i++)
6089371c9d4SSatish Balay     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
609d24d4204SJose E. Roman   PetscFunctionReturn(0);
610d24d4204SJose E. Roman }
611d24d4204SJose E. Roman 
6129371c9d4SSatish Balay static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) {
613d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
614d24d4204SJose E. Roman   Mat            Bs   = *B;
615d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
616d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
617d24d4204SJose E. Roman 
618d24d4204SJose E. Roman   PetscFunctionBegin;
619d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
6209566063dSJacob Faibussowitsch     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
621d24d4204SJose E. Roman     *B = Bs;
622d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
623d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)Bs->data;
624792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
625d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
626d24d4204SJose E. Roman   PetscFunctionReturn(0);
627d24d4204SJose E. Roman }
628d24d4204SJose E. Roman 
6299371c9d4SSatish Balay static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) {
630d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
631d24d4204SJose E. Roman   PetscScalar    *x, *x2d;
632d24d4204SJose E. Roman   const PetscInt *ranges;
633d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
634d24d4204SJose E. Roman 
635d24d4204SJose E. Roman   PetscFunctionBegin;
6369566063dSJacob Faibussowitsch   PetscCall(VecCopy(B, X));
6379566063dSJacob Faibussowitsch   PetscCall(VecGetArray(X, &x));
638d24d4204SJose E. Roman 
639d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
6409566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
6419566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
642d24d4204SJose E. Roman   xlld = PetscMax(1, A->rmap->n);
643792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
644d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
645d24d4204SJose E. Roman 
646d24d4204SJose E. Roman   /* allocate 2d vector */
647d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
6489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(lszx, &x2d));
649d24d4204SJose E. Roman   xlld = PetscMax(1, lszx);
650d24d4204SJose E. Roman 
651d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
652792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
653d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
654d24d4204SJose E. Roman 
655d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
656792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
657d24d4204SJose E. Roman 
658d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
659d24d4204SJose E. Roman   switch (A->factortype) {
660d24d4204SJose E. Roman   case MAT_FACTOR_LU:
661792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
662d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
663d24d4204SJose E. Roman     break;
664d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
665792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
666d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
667d24d4204SJose E. Roman     break;
6689371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
669d24d4204SJose E. Roman   }
670d24d4204SJose E. Roman 
671d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
672792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
673d24d4204SJose E. Roman 
6749566063dSJacob Faibussowitsch   PetscCall(PetscFree(x2d));
6759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(X, &x));
676d24d4204SJose E. Roman   PetscFunctionReturn(0);
677d24d4204SJose E. Roman }
678d24d4204SJose E. Roman 
6799371c9d4SSatish Balay static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) {
680d24d4204SJose E. Roman   PetscFunctionBegin;
6819566063dSJacob Faibussowitsch   PetscCall(MatSolve_ScaLAPACK(A, B, X));
6829566063dSJacob Faibussowitsch   PetscCall(VecAXPY(X, 1, Y));
683d24d4204SJose E. Roman   PetscFunctionReturn(0);
684d24d4204SJose E. Roman }
685d24d4204SJose E. Roman 
6869371c9d4SSatish Balay static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) {
687d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
688d24d4204SJose E. Roman   PetscBool      flg1, flg2;
689d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
690d24d4204SJose E. Roman 
691d24d4204SJose E. Roman   PetscFunctionBegin;
6929566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
6939566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
69408401ef6SPierre Jolivet   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
695d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B, 1, X, 2);
696d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)B->data;
697d24d4204SJose E. Roman   x = (Mat_ScaLAPACK *)X->data;
6989566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
699d24d4204SJose E. Roman 
700d24d4204SJose E. Roman   switch (A->factortype) {
701d24d4204SJose E. Roman   case MAT_FACTOR_LU:
702792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
703d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
704d24d4204SJose E. Roman     break;
705d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
706792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
707d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
708d24d4204SJose E. Roman     break;
7099371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
710d24d4204SJose E. Roman   }
711d24d4204SJose E. Roman   PetscFunctionReturn(0);
712d24d4204SJose E. Roman }
713d24d4204SJose E. Roman 
7149371c9d4SSatish Balay static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) {
715d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
716d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
717d24d4204SJose E. Roman 
718d24d4204SJose E. Roman   PetscFunctionBegin;
719d24d4204SJose E. Roman   if (!a->pivots) {
7209566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
7219566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)A, a->locr * sizeof(PetscBLASInt)));
722d24d4204SJose E. Roman   }
723792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
724d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf", info);
725d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
726d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
727d24d4204SJose E. Roman 
7289566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
7299566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
730d24d4204SJose E. Roman   PetscFunctionReturn(0);
731d24d4204SJose E. Roman }
732d24d4204SJose E. Roman 
7339371c9d4SSatish Balay static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) {
734d24d4204SJose E. Roman   PetscFunctionBegin;
7359566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
7369566063dSJacob Faibussowitsch   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
737d24d4204SJose E. Roman   PetscFunctionReturn(0);
738d24d4204SJose E. Roman }
739d24d4204SJose E. Roman 
7409371c9d4SSatish Balay static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
741d24d4204SJose E. Roman   PetscFunctionBegin;
742d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
743d24d4204SJose E. Roman   PetscFunctionReturn(0);
744d24d4204SJose E. Roman }
745d24d4204SJose E. Roman 
7469371c9d4SSatish Balay static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) {
747d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
748d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
749d24d4204SJose E. Roman 
750d24d4204SJose E. Roman   PetscFunctionBegin;
751792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
752d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf", info);
753d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
754d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
755d24d4204SJose E. Roman 
7569566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
7579566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
758d24d4204SJose E. Roman   PetscFunctionReturn(0);
759d24d4204SJose E. Roman }
760d24d4204SJose E. Roman 
7619371c9d4SSatish Balay static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) {
762d24d4204SJose E. Roman   PetscFunctionBegin;
7639566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
7649566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
765d24d4204SJose E. Roman   PetscFunctionReturn(0);
766d24d4204SJose E. Roman }
767d24d4204SJose E. Roman 
7689371c9d4SSatish Balay static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) {
769d24d4204SJose E. Roman   PetscFunctionBegin;
770d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
771d24d4204SJose E. Roman   PetscFunctionReturn(0);
772d24d4204SJose E. Roman }
773d24d4204SJose E. Roman 
7749371c9d4SSatish Balay PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) {
775d24d4204SJose E. Roman   PetscFunctionBegin;
776d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
777d24d4204SJose E. Roman   PetscFunctionReturn(0);
778d24d4204SJose E. Roman }
779d24d4204SJose E. Roman 
7809371c9d4SSatish Balay static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) {
781d24d4204SJose E. Roman   Mat            B;
78259172f18SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
783d24d4204SJose E. Roman 
784d24d4204SJose E. Roman   PetscFunctionBegin;
785d24d4204SJose E. Roman   /* Create the factorization matrix */
7869566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
78766e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
788d24d4204SJose E. Roman   B->factortype      = ftype;
7899566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
7909566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
791d24d4204SJose E. Roman 
7929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
793d24d4204SJose E. Roman   *F = B;
794d24d4204SJose E. Roman   PetscFunctionReturn(0);
795d24d4204SJose E. Roman }
796d24d4204SJose E. Roman 
7979371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) {
798d24d4204SJose E. Roman   PetscFunctionBegin;
7999566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
8009566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
801d24d4204SJose E. Roman   PetscFunctionReturn(0);
802d24d4204SJose E. Roman }
803d24d4204SJose E. Roman 
8049371c9d4SSatish Balay static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) {
805d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
806d24d4204SJose E. Roman   PetscBLASInt   one = 1, lwork = 0;
807d24d4204SJose E. Roman   const char    *ntype;
808d68f4f38SPierre Jolivet   PetscScalar   *work = NULL, dummy;
809d24d4204SJose E. Roman 
810d24d4204SJose E. Roman   PetscFunctionBegin;
811d24d4204SJose E. Roman   switch (type) {
812d24d4204SJose E. Roman   case NORM_1:
813d24d4204SJose E. Roman     ntype = "1";
814d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
815d24d4204SJose E. Roman     break;
816d24d4204SJose E. Roman   case NORM_FROBENIUS:
817d24d4204SJose E. Roman     ntype = "F";
818d24d4204SJose E. Roman     work  = &dummy;
819d24d4204SJose E. Roman     break;
820d24d4204SJose E. Roman   case NORM_INFINITY:
821d24d4204SJose E. Roman     ntype = "I";
822d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
823d24d4204SJose E. Roman     break;
8249371c9d4SSatish Balay   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
825d24d4204SJose E. Roman   }
8269566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
827d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
8289566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscFree(work));
829d24d4204SJose E. Roman   PetscFunctionReturn(0);
830d24d4204SJose E. Roman }
831d24d4204SJose E. Roman 
8329371c9d4SSatish Balay static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) {
833d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
834d24d4204SJose E. Roman 
835d24d4204SJose E. Roman   PetscFunctionBegin;
8369566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
837d24d4204SJose E. Roman   PetscFunctionReturn(0);
838d24d4204SJose E. Roman }
839d24d4204SJose E. Roman 
8409371c9d4SSatish Balay static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) {
841d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
842d24d4204SJose E. Roman   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
843d24d4204SJose E. Roman 
844d24d4204SJose E. Roman   PetscFunctionBegin;
845d24d4204SJose E. Roman   if (rows) {
846d24d4204SJose E. Roman     n     = a->locr;
847d24d4204SJose E. Roman     nb    = a->mb;
848d24d4204SJose E. Roman     isrc  = a->rsrc;
849d24d4204SJose E. Roman     nproc = a->grid->nprow;
850d24d4204SJose E. Roman     iproc = a->grid->myrow;
8519566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
852d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
8539566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
854d24d4204SJose E. Roman   }
855d24d4204SJose E. Roman   if (cols) {
856d24d4204SJose E. Roman     n     = a->locc;
857d24d4204SJose E. Roman     nb    = a->nb;
858d24d4204SJose E. Roman     isrc  = a->csrc;
859d24d4204SJose E. Roman     nproc = a->grid->npcol;
860d24d4204SJose E. Roman     iproc = a->grid->mycol;
8619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
862d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
8639566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
864d24d4204SJose E. Roman   }
865d24d4204SJose E. Roman   PetscFunctionReturn(0);
866d24d4204SJose E. Roman }
867d24d4204SJose E. Roman 
8689371c9d4SSatish Balay static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
869d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
870d24d4204SJose E. Roman   Mat                Bmpi;
871d24d4204SJose E. Roman   MPI_Comm           comm;
8724b1a79daSJose E. Roman   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
8734b1a79daSJose E. Roman   const PetscInt    *ranges, *branges, *cwork;
8744b1a79daSJose E. Roman   const PetscScalar *vwork;
875d24d4204SJose E. Roman   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
876d24d4204SJose E. Roman   PetscScalar       *barray;
8774b1a79daSJose E. Roman   PetscBool          differ = PETSC_FALSE;
8784b1a79daSJose E. Roman   PetscMPIInt        size;
879d24d4204SJose E. Roman 
880d24d4204SJose E. Roman   PetscFunctionBegin;
8819566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
8829566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
8834b1a79daSJose E. Roman 
8844b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
8859566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
8869566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
8879371c9d4SSatish Balay     for (i = 0; i < size; i++)
8889371c9d4SSatish Balay       if (ranges[i + 1] != branges[i + 1]) {
8899371c9d4SSatish Balay         differ = PETSC_TRUE;
8909371c9d4SSatish Balay         break;
8919371c9d4SSatish Balay       }
8924b1a79daSJose E. Roman   }
8934b1a79daSJose E. Roman 
8944b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
8959566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
8964b1a79daSJose E. Roman     m = PETSC_DECIDE;
8979566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
8984b1a79daSJose E. Roman     n = PETSC_DECIDE;
8999566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
9009566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
9019566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATDENSE));
9029566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
9034b1a79daSJose E. Roman 
9044b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9059566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
9064b1a79daSJose E. Roman     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
907792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
9084b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
9094b1a79daSJose E. Roman 
9104b1a79daSJose E. Roman     /* redistribute matrix */
9119566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
912792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
9139566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
9149566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
9159566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
9164b1a79daSJose E. Roman 
9174b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
9189566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
9194b1a79daSJose E. Roman     for (i = rstart; i < rend; i++) {
9209566063dSJacob Faibussowitsch       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
9219566063dSJacob Faibussowitsch       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
9229566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
9234b1a79daSJose E. Roman     }
9249566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
9259566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
9269566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Bmpi));
9274b1a79daSJose E. Roman 
9284b1a79daSJose E. Roman   } else { /* normal cases */
929d24d4204SJose E. Roman 
930d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
931d24d4204SJose E. Roman     else {
9329566063dSJacob Faibussowitsch       PetscCall(MatCreate(comm, &Bmpi));
93392c846b4SJose E. Roman       m = PETSC_DECIDE;
9349566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
93592c846b4SJose E. Roman       n = PETSC_DECIDE;
9369566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
9379566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
9389566063dSJacob Faibussowitsch       PetscCall(MatSetType(Bmpi, MATDENSE));
9399566063dSJacob Faibussowitsch       PetscCall(MatSetUp(Bmpi));
940d24d4204SJose E. Roman     }
941d24d4204SJose E. Roman 
942d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9439566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
944d24d4204SJose E. Roman     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
945792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
946d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
947d24d4204SJose E. Roman 
948d24d4204SJose E. Roman     /* redistribute matrix */
9499566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
950792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
9519566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
952d24d4204SJose E. Roman 
9539566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
9549566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
955d24d4204SJose E. Roman     if (reuse == MAT_INPLACE_MATRIX) {
9569566063dSJacob Faibussowitsch       PetscCall(MatHeaderReplace(A, &Bmpi));
957d24d4204SJose E. Roman     } else *B = Bmpi;
9584b1a79daSJose E. Roman   }
959d24d4204SJose E. Roman   PetscFunctionReturn(0);
960d24d4204SJose E. Roman }
961d24d4204SJose E. Roman 
9629371c9d4SSatish Balay static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) {
963b12397e7SPierre Jolivet   const PetscInt *ranges;
964b12397e7SPierre Jolivet   PetscMPIInt     size;
965b12397e7SPierre Jolivet   PetscInt        i, n;
966b12397e7SPierre Jolivet 
967b12397e7SPierre Jolivet   PetscFunctionBegin;
968b12397e7SPierre Jolivet   *correct = PETSC_TRUE;
969b12397e7SPierre Jolivet   PetscCallMPI(MPI_Comm_size(map->comm, &size));
970b12397e7SPierre Jolivet   if (size > 1) {
971b12397e7SPierre Jolivet     PetscCall(PetscLayoutGetRanges(map, &ranges));
972b12397e7SPierre Jolivet     n = ranges[1] - ranges[0];
9739371c9d4SSatish Balay     for (i = 1; i < size; i++)
9749371c9d4SSatish Balay       if (ranges[i + 1] - ranges[i] != n) break;
975b12397e7SPierre Jolivet     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
976b12397e7SPierre Jolivet   }
977b12397e7SPierre Jolivet   PetscFunctionReturn(0);
978b12397e7SPierre Jolivet }
979b12397e7SPierre Jolivet 
9809371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
981d24d4204SJose E. Roman   Mat_ScaLAPACK  *b;
982d24d4204SJose E. Roman   Mat             Bmpi;
983d24d4204SJose E. Roman   MPI_Comm        comm;
98492c846b4SJose E. Roman   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
985b12397e7SPierre Jolivet   const PetscInt *ranges, *rows, *cols;
986d24d4204SJose E. Roman   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
987d24d4204SJose E. Roman   PetscScalar    *aarray;
988b12397e7SPierre Jolivet   IS              ir, ic;
9894e8b52a1SJose E. Roman   PetscInt        lda;
990b12397e7SPierre Jolivet   PetscBool       flg;
991d24d4204SJose E. Roman 
992d24d4204SJose E. Roman   PetscFunctionBegin;
9939566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
994d24d4204SJose E. Roman 
995d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
996d24d4204SJose E. Roman   else {
9979566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
99892c846b4SJose E. Roman     m = PETSC_DECIDE;
9999566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
100092c846b4SJose E. Roman     n = PETSC_DECIDE;
10019566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10029566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10039566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
10049566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
1005d24d4204SJose E. Roman   }
1006d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)Bmpi->data;
1007d24d4204SJose E. Roman 
1008b12397e7SPierre Jolivet   PetscCall(MatDenseGetLDA(A, &lda));
1009b12397e7SPierre Jolivet   PetscCall(MatDenseGetArray(A, &aarray));
1010b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1011b12397e7SPierre Jolivet   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1012b12397e7SPierre Jolivet   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1013d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for A (1d block distribution) */
10149566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
10159566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
10164e8b52a1SJose E. Roman     lld = PetscMax(lda, 1);                       /* local leading dimension */
1017792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1018d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1019d24d4204SJose E. Roman 
1020d24d4204SJose E. Roman     /* redistribute matrix */
1021792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1022b12397e7SPierre Jolivet     Bmpi->nooffprocentries = PETSC_TRUE;
1023b12397e7SPierre Jolivet   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1024b12397e7SPierre Jolivet     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1025b12397e7SPierre Jolivet     b->roworiented = PETSC_FALSE;
1026b12397e7SPierre Jolivet     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1027b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ir, &rows));
1028b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ic, &cols));
1029b12397e7SPierre Jolivet     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1030b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ir, &rows));
1031b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ic, &cols));
1032b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ic));
1033b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ir));
1034b12397e7SPierre Jolivet   }
10359566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A, &aarray));
10369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10379566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1038d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
10399566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &Bmpi));
1040d24d4204SJose E. Roman   } else *B = Bmpi;
1041d24d4204SJose E. Roman   PetscFunctionReturn(0);
1042d24d4204SJose E. Roman }
1043d24d4204SJose E. Roman 
10449371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1045d24d4204SJose E. Roman   Mat                mat_scal;
1046d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1047d24d4204SJose E. Roman   const PetscInt    *cols;
1048d24d4204SJose E. Roman   const PetscScalar *vals;
1049d24d4204SJose E. Roman 
1050d24d4204SJose E. Roman   PetscFunctionBegin;
1051d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1052d24d4204SJose E. Roman     mat_scal = *newmat;
10539566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1054d24d4204SJose E. Roman   } else {
10559566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1056d24d4204SJose E. Roman     m = PETSC_DECIDE;
10579566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1058d24d4204SJose E. Roman     n = PETSC_DECIDE;
10599566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
10609566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
10619566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
10629566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1063d24d4204SJose E. Roman   }
1064d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
10659566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
10669566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
10679566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1068d24d4204SJose E. Roman   }
10699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
10709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1071d24d4204SJose E. Roman 
10729566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1073d24d4204SJose E. Roman   else *newmat = mat_scal;
1074d24d4204SJose E. Roman   PetscFunctionReturn(0);
1075d24d4204SJose E. Roman }
1076d24d4204SJose E. Roman 
10779371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1078d24d4204SJose E. Roman   Mat                mat_scal;
1079d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1080d24d4204SJose E. Roman   const PetscInt    *cols;
1081d24d4204SJose E. Roman   const PetscScalar *vals;
1082d24d4204SJose E. Roman   PetscScalar        v;
1083d24d4204SJose E. Roman 
1084d24d4204SJose E. Roman   PetscFunctionBegin;
1085d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1086d24d4204SJose E. Roman     mat_scal = *newmat;
10879566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1088d24d4204SJose E. Roman   } else {
10899566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1090d24d4204SJose E. Roman     m = PETSC_DECIDE;
10919566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1092d24d4204SJose E. Roman     n = PETSC_DECIDE;
10939566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
10949566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
10959566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
10969566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1097d24d4204SJose E. Roman   }
10989566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
1099d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
11009566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11019566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1102d24d4204SJose E. Roman     for (j = 0; j < ncols; j++) { /* lower triangular part */
1103d24d4204SJose E. Roman       if (cols[j] == row) continue;
1104b94d7dedSBarry Smith       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
11059566063dSJacob Faibussowitsch       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1106d24d4204SJose E. Roman     }
11079566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1108d24d4204SJose E. Roman   }
11099566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
11109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
11119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1112d24d4204SJose E. Roman 
11139566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1114d24d4204SJose E. Roman   else *newmat = mat_scal;
1115d24d4204SJose E. Roman   PetscFunctionReturn(0);
1116d24d4204SJose E. Roman }
1117d24d4204SJose E. Roman 
11189371c9d4SSatish Balay static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) {
1119d24d4204SJose E. Roman   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1120d24d4204SJose E. Roman   PetscInt       sz = 0;
1121d24d4204SJose E. Roman 
1122d24d4204SJose E. Roman   PetscFunctionBegin;
11239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
11249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1125d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1126d24d4204SJose E. Roman 
11279566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
11289566063dSJacob Faibussowitsch   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
11299566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(sz, &a->loc));
11309566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)A, sz * sizeof(PetscScalar)));
1131d24d4204SJose E. Roman 
1132d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
1133d24d4204SJose E. Roman   PetscFunctionReturn(0);
1134d24d4204SJose E. Roman }
1135d24d4204SJose E. Roman 
11369371c9d4SSatish Balay static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) {
1137d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1138d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1139d24d4204SJose E. Roman   PetscBool           flg;
1140d24d4204SJose E. Roman   MPI_Comm            icomm;
1141d24d4204SJose E. Roman 
1142d24d4204SJose E. Roman   PetscFunctionBegin;
11439566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&A->stash));
11449566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
11459566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->pivots));
11469566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
11479566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1148d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1149d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1150d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1151d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
11529566063dSJacob Faibussowitsch     PetscCall(PetscFree(grid));
11539566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1154d24d4204SJose E. Roman   }
11559566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
11569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
11579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
11589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
11599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
11609566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1161d24d4204SJose E. Roman   PetscFunctionReturn(0);
1162d24d4204SJose E. Roman }
1163d24d4204SJose E. Roman 
11649371c9d4SSatish Balay PetscErrorCode MatSetUp_ScaLAPACK(Mat A) {
1165d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1166d24d4204SJose E. Roman   PetscBLASInt   info = 0;
1167b12397e7SPierre Jolivet   PetscBool      flg;
1168d24d4204SJose E. Roman 
1169d24d4204SJose E. Roman   PetscFunctionBegin;
11709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
11719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1172d24d4204SJose E. Roman 
1173b12397e7SPierre Jolivet   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1174b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1175b12397e7SPierre Jolivet   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1176b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1177b12397e7SPierre Jolivet   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1178d24d4204SJose E. Roman 
1179d24d4204SJose E. Roman   /* compute local sizes */
11809566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
11819566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1182d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1183d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1184d24d4204SJose E. Roman   a->lld  = PetscMax(1, a->locr);
1185d24d4204SJose E. Roman 
1186d24d4204SJose E. Roman   /* allocate local array */
11879566063dSJacob Faibussowitsch   PetscCall(MatScaLAPACKSetPreallocation(A));
1188d24d4204SJose E. Roman 
1189d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1190792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1191d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
1192d24d4204SJose E. Roman   PetscFunctionReturn(0);
1193d24d4204SJose E. Roman }
1194d24d4204SJose E. Roman 
11959371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) {
1196d24d4204SJose E. Roman   PetscInt nstash, reallocs;
1197d24d4204SJose E. Roman 
1198d24d4204SJose E. Roman   PetscFunctionBegin;
1199d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
12009566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
12019566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
12029566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1203d24d4204SJose E. Roman   PetscFunctionReturn(0);
1204d24d4204SJose E. Roman }
1205d24d4204SJose E. Roman 
12069371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) {
1207d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1208d24d4204SJose E. Roman   PetscMPIInt    n;
1209d24d4204SJose E. Roman   PetscInt       i, flg, *row, *col;
1210d24d4204SJose E. Roman   PetscScalar   *val;
1211d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1212d24d4204SJose E. Roman 
1213d24d4204SJose E. Roman   PetscFunctionBegin;
1214d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1215d24d4204SJose E. Roman   while (1) {
12169566063dSJacob Faibussowitsch     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1217d24d4204SJose E. Roman     if (!flg) break;
1218d24d4204SJose E. Roman     for (i = 0; i < n; i++) {
12199566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
12209566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1221792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1222aed4548fSBarry Smith       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1223d24d4204SJose E. Roman       switch (A->insertmode) {
1224d24d4204SJose E. Roman       case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break;
1225d24d4204SJose E. Roman       case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break;
122698921bdaSJacob Faibussowitsch       default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1227d24d4204SJose E. Roman       }
1228d24d4204SJose E. Roman     }
1229d24d4204SJose E. Roman   }
12309566063dSJacob Faibussowitsch   PetscCall(MatStashScatterEnd_Private(&A->stash));
1231d24d4204SJose E. Roman   PetscFunctionReturn(0);
1232d24d4204SJose E. Roman }
1233d24d4204SJose E. Roman 
12349371c9d4SSatish Balay PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) {
1235d24d4204SJose E. Roman   Mat      Adense, As;
1236d24d4204SJose E. Roman   MPI_Comm comm;
1237d24d4204SJose E. Roman 
1238d24d4204SJose E. Roman   PetscFunctionBegin;
12399566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
12409566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Adense));
12419566063dSJacob Faibussowitsch   PetscCall(MatSetType(Adense, MATDENSE));
12429566063dSJacob Faibussowitsch   PetscCall(MatLoad(Adense, viewer));
12439566063dSJacob Faibussowitsch   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
12449566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
12459566063dSJacob Faibussowitsch   PetscCall(MatHeaderReplace(newMat, &As));
1246d24d4204SJose E. Roman   PetscFunctionReturn(0);
1247d24d4204SJose E. Roman }
1248d24d4204SJose E. Roman 
1249d24d4204SJose E. Roman /* -------------------------------------------------------------------*/
12509371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1251d24d4204SJose E. Roman                                        0,
1252d24d4204SJose E. Roman                                        0,
1253d24d4204SJose E. Roman                                        MatMult_ScaLAPACK,
1254d24d4204SJose E. Roman                                        /* 4*/ MatMultAdd_ScaLAPACK,
1255d24d4204SJose E. Roman                                        MatMultTranspose_ScaLAPACK,
1256d24d4204SJose E. Roman                                        MatMultTransposeAdd_ScaLAPACK,
1257d24d4204SJose E. Roman                                        MatSolve_ScaLAPACK,
1258d24d4204SJose E. Roman                                        MatSolveAdd_ScaLAPACK,
1259d24d4204SJose E. Roman                                        0,
1260d24d4204SJose E. Roman                                        /*10*/ 0,
1261d24d4204SJose E. Roman                                        MatLUFactor_ScaLAPACK,
1262d24d4204SJose E. Roman                                        MatCholeskyFactor_ScaLAPACK,
1263d24d4204SJose E. Roman                                        0,
1264d24d4204SJose E. Roman                                        MatTranspose_ScaLAPACK,
1265d24d4204SJose E. Roman                                        /*15*/ MatGetInfo_ScaLAPACK,
1266d24d4204SJose E. Roman                                        0,
1267d24d4204SJose E. Roman                                        MatGetDiagonal_ScaLAPACK,
1268d24d4204SJose E. Roman                                        MatDiagonalScale_ScaLAPACK,
1269d24d4204SJose E. Roman                                        MatNorm_ScaLAPACK,
1270d24d4204SJose E. Roman                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1271d24d4204SJose E. Roman                                        MatAssemblyEnd_ScaLAPACK,
1272d24d4204SJose E. Roman                                        MatSetOption_ScaLAPACK,
1273d24d4204SJose E. Roman                                        MatZeroEntries_ScaLAPACK,
1274d24d4204SJose E. Roman                                        /*24*/ 0,
1275d24d4204SJose E. Roman                                        MatLUFactorSymbolic_ScaLAPACK,
1276d24d4204SJose E. Roman                                        MatLUFactorNumeric_ScaLAPACK,
1277d24d4204SJose E. Roman                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1278d24d4204SJose E. Roman                                        MatCholeskyFactorNumeric_ScaLAPACK,
1279d24d4204SJose E. Roman                                        /*29*/ MatSetUp_ScaLAPACK,
1280d24d4204SJose E. Roman                                        0,
1281d24d4204SJose E. Roman                                        0,
1282d24d4204SJose E. Roman                                        0,
1283d24d4204SJose E. Roman                                        0,
1284d24d4204SJose E. Roman                                        /*34*/ MatDuplicate_ScaLAPACK,
1285d24d4204SJose E. Roman                                        0,
1286d24d4204SJose E. Roman                                        0,
1287d24d4204SJose E. Roman                                        0,
1288d24d4204SJose E. Roman                                        0,
1289d24d4204SJose E. Roman                                        /*39*/ MatAXPY_ScaLAPACK,
1290d24d4204SJose E. Roman                                        0,
1291d24d4204SJose E. Roman                                        0,
1292d24d4204SJose E. Roman                                        0,
1293d24d4204SJose E. Roman                                        MatCopy_ScaLAPACK,
1294d24d4204SJose E. Roman                                        /*44*/ 0,
1295d24d4204SJose E. Roman                                        MatScale_ScaLAPACK,
1296d24d4204SJose E. Roman                                        MatShift_ScaLAPACK,
1297d24d4204SJose E. Roman                                        0,
1298d24d4204SJose E. Roman                                        0,
1299d24d4204SJose E. Roman                                        /*49*/ 0,
1300d24d4204SJose E. Roman                                        0,
1301d24d4204SJose E. Roman                                        0,
1302d24d4204SJose E. Roman                                        0,
1303d24d4204SJose E. Roman                                        0,
1304d24d4204SJose E. Roman                                        /*54*/ 0,
1305d24d4204SJose E. Roman                                        0,
1306d24d4204SJose E. Roman                                        0,
1307d24d4204SJose E. Roman                                        0,
1308d24d4204SJose E. Roman                                        0,
1309d24d4204SJose E. Roman                                        /*59*/ 0,
1310d24d4204SJose E. Roman                                        MatDestroy_ScaLAPACK,
1311d24d4204SJose E. Roman                                        MatView_ScaLAPACK,
1312d24d4204SJose E. Roman                                        0,
1313d24d4204SJose E. Roman                                        0,
1314d24d4204SJose E. Roman                                        /*64*/ 0,
1315d24d4204SJose E. Roman                                        0,
1316d24d4204SJose E. Roman                                        0,
1317d24d4204SJose E. Roman                                        0,
1318d24d4204SJose E. Roman                                        0,
1319d24d4204SJose E. Roman                                        /*69*/ 0,
1320d24d4204SJose E. Roman                                        0,
1321d24d4204SJose E. Roman                                        MatConvert_ScaLAPACK_Dense,
1322d24d4204SJose E. Roman                                        0,
1323d24d4204SJose E. Roman                                        0,
1324d24d4204SJose E. Roman                                        /*74*/ 0,
1325d24d4204SJose E. Roman                                        0,
1326d24d4204SJose E. Roman                                        0,
1327d24d4204SJose E. Roman                                        0,
1328d24d4204SJose E. Roman                                        0,
1329d24d4204SJose E. Roman                                        /*79*/ 0,
1330d24d4204SJose E. Roman                                        0,
1331d24d4204SJose E. Roman                                        0,
1332d24d4204SJose E. Roman                                        0,
1333d24d4204SJose E. Roman                                        MatLoad_ScaLAPACK,
1334d24d4204SJose E. Roman                                        /*84*/ 0,
1335d24d4204SJose E. Roman                                        0,
1336d24d4204SJose E. Roman                                        0,
1337d24d4204SJose E. Roman                                        0,
1338d24d4204SJose E. Roman                                        0,
1339d24d4204SJose E. Roman                                        /*89*/ 0,
1340d24d4204SJose E. Roman                                        0,
1341d24d4204SJose E. Roman                                        MatMatMultNumeric_ScaLAPACK,
1342d24d4204SJose E. Roman                                        0,
1343d24d4204SJose E. Roman                                        0,
1344d24d4204SJose E. Roman                                        /*94*/ 0,
1345d24d4204SJose E. Roman                                        0,
1346d24d4204SJose E. Roman                                        0,
1347d24d4204SJose E. Roman                                        MatMatTransposeMultNumeric_ScaLAPACK,
1348d24d4204SJose E. Roman                                        0,
1349d24d4204SJose E. Roman                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1350d24d4204SJose E. Roman                                        0,
1351d24d4204SJose E. Roman                                        0,
1352d24d4204SJose E. Roman                                        MatConjugate_ScaLAPACK,
1353d24d4204SJose E. Roman                                        0,
1354d24d4204SJose E. Roman                                        /*104*/ 0,
1355d24d4204SJose E. Roman                                        0,
1356d24d4204SJose E. Roman                                        0,
1357d24d4204SJose E. Roman                                        0,
1358d24d4204SJose E. Roman                                        0,
1359d24d4204SJose E. Roman                                        /*109*/ MatMatSolve_ScaLAPACK,
1360d24d4204SJose E. Roman                                        0,
1361d24d4204SJose E. Roman                                        0,
1362d24d4204SJose E. Roman                                        0,
1363d24d4204SJose E. Roman                                        MatMissingDiagonal_ScaLAPACK,
1364d24d4204SJose E. Roman                                        /*114*/ 0,
1365d24d4204SJose E. Roman                                        0,
1366d24d4204SJose E. Roman                                        0,
1367d24d4204SJose E. Roman                                        0,
1368d24d4204SJose E. Roman                                        0,
1369d24d4204SJose E. Roman                                        /*119*/ 0,
1370d24d4204SJose E. Roman                                        MatHermitianTranspose_ScaLAPACK,
1371d24d4204SJose E. Roman                                        0,
1372d24d4204SJose E. Roman                                        0,
1373d24d4204SJose E. Roman                                        0,
1374d24d4204SJose E. Roman                                        /*124*/ 0,
1375d24d4204SJose E. Roman                                        0,
1376d24d4204SJose E. Roman                                        0,
1377d24d4204SJose E. Roman                                        0,
1378d24d4204SJose E. Roman                                        0,
1379d24d4204SJose E. Roman                                        /*129*/ 0,
1380d24d4204SJose E. Roman                                        0,
1381d24d4204SJose E. Roman                                        0,
1382d24d4204SJose E. Roman                                        0,
1383d24d4204SJose E. Roman                                        0,
1384d24d4204SJose E. Roman                                        /*134*/ 0,
1385d24d4204SJose E. Roman                                        0,
1386d24d4204SJose E. Roman                                        0,
1387d24d4204SJose E. Roman                                        0,
1388d24d4204SJose E. Roman                                        0,
1389d24d4204SJose E. Roman                                        0,
1390d24d4204SJose E. Roman                                        /*140*/ 0,
1391d24d4204SJose E. Roman                                        0,
1392d24d4204SJose E. Roman                                        0,
1393d24d4204SJose E. Roman                                        0,
1394d24d4204SJose E. Roman                                        0,
1395d24d4204SJose E. Roman                                        /*145*/ 0,
1396d24d4204SJose E. Roman                                        0,
139799a7f59eSMark Adams                                        0,
139899a7f59eSMark Adams                                        0,
13997fb60732SBarry Smith                                        0,
14009371c9d4SSatish Balay                                        /*150*/ 0};
1401d24d4204SJose E. Roman 
14029371c9d4SSatish Balay static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) {
1403d24d4204SJose E. Roman   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1404d24d4204SJose E. Roman   PetscInt           size = stash->size, nsends;
1405d24d4204SJose E. Roman   PetscInt           count, *sindices, **rindices, i, j, l;
1406d24d4204SJose E. Roman   PetscScalar      **rvalues, *svalues;
1407d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1408d24d4204SJose E. Roman   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1409d24d4204SJose E. Roman   PetscMPIInt       *sizes, *nlengths, nreceives;
1410d24d4204SJose E. Roman   PetscInt          *sp_idx, *sp_idy;
1411d24d4204SJose E. Roman   PetscScalar       *sp_val;
1412d24d4204SJose E. Roman   PetscMatStashSpace space, space_next;
1413d24d4204SJose E. Roman   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1414d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1415d24d4204SJose E. Roman 
1416d24d4204SJose E. Roman   PetscFunctionBegin;
1417d24d4204SJose E. Roman   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1418d24d4204SJose E. Roman     InsertMode addv;
14191c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
142008401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1421d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1422d24d4204SJose E. Roman   }
1423d24d4204SJose E. Roman 
1424d24d4204SJose E. Roman   bs2 = stash->bs * stash->bs;
1425d24d4204SJose E. Roman 
1426d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
14279566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
14289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1429d24d4204SJose E. Roman 
1430d24d4204SJose E. Roman   i = j = 0;
1431d24d4204SJose E. Roman   space = stash->space_head;
1432d24d4204SJose E. Roman   while (space) {
1433d24d4204SJose E. Roman     space_next = space->next;
1434d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
14359566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
14369566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1437792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1438d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
14399371c9d4SSatish Balay       nlengths[j]++;
14409371c9d4SSatish Balay       owner[i] = j;
1441d24d4204SJose E. Roman       i++;
1442d24d4204SJose E. Roman     }
1443d24d4204SJose E. Roman     space = space_next;
1444d24d4204SJose E. Roman   }
1445d24d4204SJose E. Roman 
1446d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
14479566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
1448d24d4204SJose E. Roman   for (i = 0, nsends = 0; i < size; i++) {
1449d24d4204SJose E. Roman     if (nlengths[i]) {
14509371c9d4SSatish Balay       sizes[i] = 1;
14519371c9d4SSatish Balay       nsends++;
1452d24d4204SJose E. Roman     }
1453d24d4204SJose E. Roman   }
1454d24d4204SJose E. Roman 
14559371c9d4SSatish Balay   {
14569371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
1457d24d4204SJose E. Roman     /* Determine the number of messages to expect, their lengths, from from-ids */
14589566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
14599566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1460d24d4204SJose E. Roman     /* since clubbing row,col - lengths are multiplied by 2 */
1461d24d4204SJose E. Roman     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
14629566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1463d24d4204SJose E. Roman     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1464d24d4204SJose E. Roman     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
14659566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
14669566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
14679371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
14689371c9d4SSatish Balay   }
1469d24d4204SJose E. Roman 
1470d24d4204SJose E. Roman   /* do sends:
1471d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1472d24d4204SJose E. Roman          the ith processor
1473d24d4204SJose E. Roman   */
14749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
14759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
14769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1477d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
14789371c9d4SSatish Balay   startv[0] = 0;
14799371c9d4SSatish Balay   starti[0] = 0;
1480d24d4204SJose E. Roman   for (i = 1; i < size; i++) {
1481d24d4204SJose E. Roman     startv[i] = startv[i - 1] + nlengths[i - 1];
1482d24d4204SJose E. Roman     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1483d24d4204SJose E. Roman   }
1484d24d4204SJose E. Roman 
1485d24d4204SJose E. Roman   i     = 0;
1486d24d4204SJose E. Roman   space = stash->space_head;
1487d24d4204SJose E. Roman   while (space) {
1488d24d4204SJose E. Roman     space_next = space->next;
1489d24d4204SJose E. Roman     sp_idx     = space->idx;
1490d24d4204SJose E. Roman     sp_idy     = space->idy;
1491d24d4204SJose E. Roman     sp_val     = space->val;
1492d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
1493d24d4204SJose E. Roman       j = owner[i];
1494d24d4204SJose E. Roman       if (bs2 == 1) {
1495d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1496d24d4204SJose E. Roman       } else {
1497d24d4204SJose E. Roman         PetscInt     k;
1498d24d4204SJose E. Roman         PetscScalar *buf1, *buf2;
1499d24d4204SJose E. Roman         buf1 = svalues + bs2 * startv[j];
1500d24d4204SJose E. Roman         buf2 = space->val + bs2 * l;
1501d24d4204SJose E. Roman         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1502d24d4204SJose E. Roman       }
1503d24d4204SJose E. Roman       sindices[starti[j]]               = sp_idx[l];
1504d24d4204SJose E. Roman       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1505d24d4204SJose E. Roman       startv[j]++;
1506d24d4204SJose E. Roman       starti[j]++;
1507d24d4204SJose E. Roman       i++;
1508d24d4204SJose E. Roman     }
1509d24d4204SJose E. Roman     space = space_next;
1510d24d4204SJose E. Roman   }
1511d24d4204SJose E. Roman   startv[0] = 0;
1512d24d4204SJose E. Roman   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1513d24d4204SJose E. Roman 
1514d24d4204SJose E. Roman   for (i = 0, count = 0; i < size; i++) {
1515d24d4204SJose E. Roman     if (sizes[i]) {
15169566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
15179566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1518d24d4204SJose E. Roman     }
1519d24d4204SJose E. Roman   }
1520d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
15219566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1522d24d4204SJose E. Roman   for (i = 0; i < size; i++) {
1523*48a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1524d24d4204SJose E. Roman   }
1525d24d4204SJose E. Roman #endif
15269566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
15279566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
15289566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
15299566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
1530d24d4204SJose E. Roman 
1531d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
15329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1533d24d4204SJose E. Roman 
1534d24d4204SJose E. Roman   for (i = 0; i < nreceives; i++) {
1535d24d4204SJose E. Roman     recv_waits[2 * i]     = recv_waits1[i];
1536d24d4204SJose E. Roman     recv_waits[2 * i + 1] = recv_waits2[i];
1537d24d4204SJose E. Roman   }
1538d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1539d24d4204SJose E. Roman 
15409566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
15419566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
1542d24d4204SJose E. Roman 
1543d24d4204SJose E. Roman   stash->svalues         = svalues;
1544d24d4204SJose E. Roman   stash->sindices        = sindices;
1545d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1546d24d4204SJose E. Roman   stash->rindices        = rindices;
1547d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1548d24d4204SJose E. Roman   stash->nsends          = nsends;
1549d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1550d24d4204SJose E. Roman   stash->reproduce_count = 0;
1551d24d4204SJose E. Roman   PetscFunctionReturn(0);
1552d24d4204SJose E. Roman }
1553d24d4204SJose E. Roman 
15549371c9d4SSatish Balay static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) {
1555d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1556d24d4204SJose E. Roman 
1557d24d4204SJose E. Roman   PetscFunctionBegin;
155828b400f6SJacob Faibussowitsch   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1559aed4548fSBarry Smith   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1560aed4548fSBarry Smith   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
15619566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
15629566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1563d24d4204SJose E. Roman   PetscFunctionReturn(0);
1564d24d4204SJose E. Roman }
1565d24d4204SJose E. Roman 
1566d24d4204SJose E. Roman /*@
15676aad120cSJose E. Roman    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1568d24d4204SJose E. Roman    the ScaLAPACK matrix
1569d24d4204SJose E. Roman 
1570d24d4204SJose E. Roman    Logically Collective on A
1571d24d4204SJose E. Roman 
1572d8d19677SJose E. Roman    Input Parameters:
1573d24d4204SJose E. Roman +  A  - a MATSCALAPACK matrix
1574d24d4204SJose E. Roman .  mb - the row block size
1575d24d4204SJose E. Roman -  nb - the column block size
1576d24d4204SJose E. Roman 
1577d24d4204SJose E. Roman    Level: intermediate
1578d24d4204SJose E. Roman 
1579db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1580d24d4204SJose E. Roman @*/
15819371c9d4SSatish Balay PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) {
1582d24d4204SJose E. Roman   PetscFunctionBegin;
1583d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1584d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, mb, 2);
1585d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, nb, 3);
1586cac4c232SBarry Smith   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1587d24d4204SJose E. Roman   PetscFunctionReturn(0);
1588d24d4204SJose E. Roman }
1589d24d4204SJose E. Roman 
15909371c9d4SSatish Balay static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) {
1591d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1592d24d4204SJose E. Roman 
1593d24d4204SJose E. Roman   PetscFunctionBegin;
1594d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1595d24d4204SJose E. Roman   if (nb) *nb = a->nb;
1596d24d4204SJose E. Roman   PetscFunctionReturn(0);
1597d24d4204SJose E. Roman }
1598d24d4204SJose E. Roman 
1599d24d4204SJose E. Roman /*@
16006aad120cSJose E. Roman    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1601d24d4204SJose E. Roman    the ScaLAPACK matrix
1602d24d4204SJose E. Roman 
1603d24d4204SJose E. Roman    Not collective
1604d24d4204SJose E. Roman 
1605d24d4204SJose E. Roman    Input Parameter:
1606d24d4204SJose E. Roman .  A  - a MATSCALAPACK matrix
1607d24d4204SJose E. Roman 
1608d24d4204SJose E. Roman    Output Parameters:
1609d24d4204SJose E. Roman +  mb - the row block size
1610d24d4204SJose E. Roman -  nb - the column block size
1611d24d4204SJose E. Roman 
1612d24d4204SJose E. Roman    Level: intermediate
1613d24d4204SJose E. Roman 
1614db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1615d24d4204SJose E. Roman @*/
16169371c9d4SSatish Balay PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) {
1617d24d4204SJose E. Roman   PetscFunctionBegin;
1618d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1619cac4c232SBarry Smith   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1620d24d4204SJose E. Roman   PetscFunctionReturn(0);
1621d24d4204SJose E. Roman }
1622d24d4204SJose E. Roman 
1623d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1624d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1625d24d4204SJose E. Roman 
1626d24d4204SJose E. Roman /*MC
1627d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1628d24d4204SJose E. Roman 
1629d24d4204SJose E. Roman    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1630d24d4204SJose E. Roman 
1631d24d4204SJose E. Roman    Options Database Keys:
1632d24d4204SJose E. Roman +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
163389bba20eSBarry Smith .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1634d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1635d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1636d24d4204SJose E. Roman 
163789bba20eSBarry Smith   Note:
163889bba20eSBarry Smith    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
163989bba20eSBarry Smith    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
164089bba20eSBarry Smith    the given rank.
164189bba20eSBarry Smith 
1642d24d4204SJose E. Roman    Level: beginner
1643d24d4204SJose E. Roman 
164489bba20eSBarry Smith .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1645d24d4204SJose E. Roman M*/
1646d24d4204SJose E. Roman 
16479371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) {
1648d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1649d24d4204SJose E. Roman   PetscBool           flg, flg1;
1650d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1651d24d4204SJose E. Roman   MPI_Comm            icomm;
1652d24d4204SJose E. Roman   PetscBLASInt        nprow, npcol, myrow, mycol;
1653d24d4204SJose E. Roman   PetscInt            optv1, k = 2, array[2] = {0, 0};
1654d24d4204SJose E. Roman   PetscMPIInt         size;
1655d24d4204SJose E. Roman 
1656d24d4204SJose E. Roman   PetscFunctionBegin;
16579566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1658d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1659d24d4204SJose E. Roman 
16609566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1661d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1662d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1663d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1664d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1665d24d4204SJose E. Roman 
16669566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A, &a));
1667d24d4204SJose E. Roman   A->data = (void *)a;
1668d24d4204SJose E. Roman 
1669d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1670d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
16719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
16729566063dSJacob Faibussowitsch     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
16739566063dSJacob Faibussowitsch     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1674d24d4204SJose E. Roman   }
16759566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
16769566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1677d24d4204SJose E. Roman   if (!flg) {
16789566063dSJacob Faibussowitsch     PetscCall(PetscNewLog(A, &grid));
1679d24d4204SJose E. Roman 
16809566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(icomm, &size));
1681d24d4204SJose E. Roman     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1682d24d4204SJose E. Roman 
1683d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
16849566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1685d24d4204SJose E. Roman     if (flg1) {
168608401ef6SPierre Jolivet       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1687d24d4204SJose E. Roman       grid->nprow = optv1;
1688d24d4204SJose E. Roman     }
1689d0609cedSBarry Smith     PetscOptionsEnd();
1690d24d4204SJose E. Roman 
1691d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1692d24d4204SJose E. Roman     grid->npcol = size / grid->nprow;
16939566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
16949566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1695f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1696d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1697d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1698d24d4204SJose E. Roman     grid->grid_refct = 1;
1699d24d4204SJose E. Roman     grid->nprow      = nprow;
1700d24d4204SJose E. Roman     grid->npcol      = npcol;
1701d24d4204SJose E. Roman     grid->myrow      = myrow;
1702d24d4204SJose E. Roman     grid->mycol      = mycol;
1703d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1704f7ec113fSDamian Marek     grid->ictxrow    = Csys2blacs_handle(icomm);
1705d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1706f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1707d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
17089566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1709d24d4204SJose E. Roman 
1710d24d4204SJose E. Roman   } else grid->grid_refct++;
17119566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
1712d24d4204SJose E. Roman   a->grid = grid;
1713d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1714d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1715d24d4204SJose E. Roman 
1716d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
17179566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1718d24d4204SJose E. Roman   if (flg) {
1719d24d4204SJose E. Roman     a->mb = array[0];
1720d24d4204SJose E. Roman     a->nb = (k > 1) ? array[1] : a->mb;
1721d24d4204SJose E. Roman   }
1722d0609cedSBarry Smith   PetscOptionsEnd();
1723d24d4204SJose E. Roman 
1724b12397e7SPierre Jolivet   a->roworiented = PETSC_TRUE;
17259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
17269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
17279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
17289566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1729d24d4204SJose E. Roman   PetscFunctionReturn(0);
1730d24d4204SJose E. Roman }
1731d24d4204SJose E. Roman 
1732d24d4204SJose E. Roman /*@C
1733d24d4204SJose E. Roman    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1734d24d4204SJose E. Roman    (2D block cyclic distribution).
1735d24d4204SJose E. Roman 
1736d24d4204SJose E. Roman    Collective
1737d24d4204SJose E. Roman 
1738d24d4204SJose E. Roman    Input Parameters:
1739d24d4204SJose E. Roman +  comm - MPI communicator
1740d24d4204SJose E. Roman .  mb   - row block size (or PETSC_DECIDE to have it set)
1741d24d4204SJose E. Roman .  nb   - column block size (or PETSC_DECIDE to have it set)
1742d24d4204SJose E. Roman .  M    - number of global rows
1743d24d4204SJose E. Roman .  N    - number of global columns
1744d24d4204SJose E. Roman .  rsrc - coordinate of process that owns the first row of the distributed matrix
1745d24d4204SJose E. Roman -  csrc - coordinate of process that owns the first column of the distributed matrix
1746d24d4204SJose E. Roman 
1747d24d4204SJose E. Roman    Output Parameter:
1748d24d4204SJose E. Roman .  A - the matrix
1749d24d4204SJose E. Roman 
1750d24d4204SJose E. Roman    Options Database Keys:
1751d24d4204SJose E. Roman .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1752d24d4204SJose E. Roman 
1753d24d4204SJose E. Roman    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1754d24d4204SJose E. Roman    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1755d24d4204SJose E. Roman    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1756d24d4204SJose E. Roman 
1757d24d4204SJose E. Roman    Notes:
1758d24d4204SJose E. Roman    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1759d24d4204SJose E. Roman    is chosen.
1760d24d4204SJose E. Roman 
1761d24d4204SJose E. Roman    Storage Information:
1762d24d4204SJose E. Roman    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1763d24d4204SJose E. Roman    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1764d24d4204SJose E. Roman    significance and are thus ignored. The block sizes refer to the values
1765d24d4204SJose E. Roman    used for the distributed matrix, not the same meaning as in BAIJ.
1766d24d4204SJose E. Roman 
1767d24d4204SJose E. Roman    Level: intermediate
1768d24d4204SJose E. Roman 
1769db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1770d24d4204SJose E. Roman @*/
17719371c9d4SSatish Balay PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) {
1772d24d4204SJose E. Roman   Mat_ScaLAPACK *a;
1773d24d4204SJose E. Roman   PetscInt       m, n;
1774d24d4204SJose E. Roman 
1775d24d4204SJose E. Roman   PetscFunctionBegin;
17769566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
17779566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSCALAPACK));
1778aed4548fSBarry Smith   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1779d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1780d24d4204SJose E. Roman   m = PETSC_DECIDE;
17819566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1782d24d4204SJose E. Roman   n = PETSC_DECIDE;
17839566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
17849566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1785d24d4204SJose E. Roman   a = (Mat_ScaLAPACK *)(*A)->data;
17869566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(M, &a->M));
17879566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(N, &a->N));
17889566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
17899566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
17909566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
17919566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
17929566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*A));
1793d24d4204SJose E. Roman   PetscFunctionReturn(0);
1794d24d4204SJose E. Roman }
1795