xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 57168dbe2b4331ffbf7c9254e2e952baf9ea37a0)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet "       TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet "       PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet "       ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet "       YEAR = 1997\n"
1127e75052SPierre Jolivet "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE;
1327e75052SPierre Jolivet 
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman 
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman 
22f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23f7ec113fSDamian Marek {
24f7ec113fSDamian Marek   PetscFunctionBegin;
259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
269566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27f7ec113fSDamian Marek   PetscFunctionReturn(0);
28f7ec113fSDamian Marek }
29f7ec113fSDamian Marek 
30d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31d24d4204SJose E. Roman {
32d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33d24d4204SJose E. Roman   PetscBool         iascii;
34d24d4204SJose E. Roman   PetscViewerFormat format;
35d24d4204SJose E. Roman   Mat               Adense;
36d24d4204SJose E. Roman 
37d24d4204SJose E. Roman   PetscFunctionBegin;
389566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39d24d4204SJose E. Roman   if (iascii) {
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer,&format));
41d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
429566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
439566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
449566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
459566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46d24d4204SJose E. Roman       PetscFunctionReturn(0);
47d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48d24d4204SJose E. Roman       PetscFunctionReturn(0);
49d24d4204SJose E. Roman     }
50d24d4204SJose E. Roman   }
51d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
529566063dSJacob Faibussowitsch   PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
539566063dSJacob Faibussowitsch   PetscCall(MatView(Adense,viewer));
549566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
55d24d4204SJose E. Roman   PetscFunctionReturn(0);
56d24d4204SJose E. Roman }
57d24d4204SJose E. Roman 
58d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59d24d4204SJose E. Roman {
60d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61d24d4204SJose E. Roman   PetscLogDouble isend[2],irecv[2];
62d24d4204SJose E. Roman 
63d24d4204SJose E. Roman   PetscFunctionBegin;
64d24d4204SJose E. Roman   info->block_size = 1.0;
65d24d4204SJose E. Roman 
66d24d4204SJose E. Roman   isend[0] = a->lld*a->locc;     /* locally allocated */
67d24d4204SJose E. Roman   isend[1] = a->locr*a->locc;    /* used submatrix */
68d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69d24d4204SJose E. Roman     info->nz_allocated   = isend[0];
70d24d4204SJose E. Roman     info->nz_used        = isend[1];
71d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
72*57168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
73d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
74d24d4204SJose E. Roman     info->nz_used        = irecv[1];
75d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
76*57168dbeSPierre Jolivet     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
77d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
78d24d4204SJose E. Roman     info->nz_used        = irecv[1];
79d24d4204SJose E. Roman   }
80d24d4204SJose E. Roman 
81d24d4204SJose E. Roman   info->nz_unneeded       = 0;
82d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
83d24d4204SJose E. Roman   info->mallocs           = 0;
84d24d4204SJose E. Roman   info->memory            = ((PetscObject)A)->mem;
85d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
86d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
87d24d4204SJose E. Roman   info->factor_mallocs    = 0;
88d24d4204SJose E. Roman   PetscFunctionReturn(0);
89d24d4204SJose E. Roman }
90d24d4204SJose E. Roman 
91d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92d24d4204SJose E. Roman {
93b12397e7SPierre Jolivet   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
94b12397e7SPierre Jolivet 
95d24d4204SJose E. Roman   PetscFunctionBegin;
96d24d4204SJose E. Roman   switch (op) {
97d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATIONS:
98d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATION_ERR:
99d24d4204SJose E. Roman     case MAT_NEW_NONZERO_ALLOCATION_ERR:
100d24d4204SJose E. Roman     case MAT_SYMMETRIC:
101d24d4204SJose E. Roman     case MAT_SORTED_FULL:
102d24d4204SJose E. Roman     case MAT_HERMITIAN:
103d24d4204SJose E. Roman       break;
104b12397e7SPierre Jolivet     case MAT_ROW_ORIENTED:
105b12397e7SPierre Jolivet       a->roworiented = flg;
106b12397e7SPierre Jolivet       break;
107d24d4204SJose E. Roman     default:
10898921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
109d24d4204SJose E. Roman   }
110d24d4204SJose E. Roman   PetscFunctionReturn(0);
111d24d4204SJose E. Roman }
112d24d4204SJose E. Roman 
113d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
114d24d4204SJose E. Roman {
115d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
116d24d4204SJose E. Roman   PetscInt       i,j;
117d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
118b12397e7SPierre Jolivet   PetscBool      roworiented = a->roworiented;
119d24d4204SJose E. Roman 
120d24d4204SJose E. Roman   PetscFunctionBegin;
121b12397e7SPierre Jolivet   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
122d24d4204SJose E. Roman   for (i=0;i<nr;i++) {
123d24d4204SJose E. Roman     if (rows[i] < 0) continue;
1249566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(rows[i]+1,&gridx));
125d24d4204SJose E. Roman     for (j=0;j<nc;j++) {
126d24d4204SJose E. Roman       if (cols[j] < 0) continue;
1279566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx));
128792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
129d24d4204SJose E. Roman       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
130b12397e7SPierre Jolivet         if (roworiented) {
131d24d4204SJose E. Roman           switch (imode) {
132d24d4204SJose E. Roman             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
133b12397e7SPierre Jolivet             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
134b12397e7SPierre Jolivet           }
135b12397e7SPierre Jolivet         } else {
136b12397e7SPierre Jolivet           switch (imode) {
137b12397e7SPierre Jolivet             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break;
138b12397e7SPierre Jolivet             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break;
139b12397e7SPierre Jolivet           }
140d24d4204SJose E. Roman         }
141d24d4204SJose E. Roman       } else {
14228b400f6SJacob Faibussowitsch         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
143d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
144b12397e7SPierre Jolivet         PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES)));
145d24d4204SJose E. Roman       }
146d24d4204SJose E. Roman     }
147d24d4204SJose E. Roman   }
148d24d4204SJose E. Roman   PetscFunctionReturn(0);
149d24d4204SJose E. Roman }
150d24d4204SJose E. Roman 
151d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
152d24d4204SJose E. Roman {
153d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
154d24d4204SJose E. Roman   PetscScalar    *x2d,*y2d,alpha=1.0;
155d24d4204SJose E. Roman   const PetscInt *ranges;
156d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
157d24d4204SJose E. Roman 
158d24d4204SJose E. Roman   PetscFunctionBegin;
159d24d4204SJose E. Roman   if (transpose) {
160d24d4204SJose E. Roman 
161d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1629566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
1639566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
164d24d4204SJose E. Roman     xlld = PetscMax(1,A->rmap->n);
165792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
166d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
1679566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
1689566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
169d24d4204SJose E. Roman     ylld = 1;
170792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
171d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
172d24d4204SJose E. Roman 
173d24d4204SJose E. Roman     /* allocate 2d vectors */
174d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
175d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
177d24d4204SJose E. Roman     xlld = PetscMax(1,lszx);
178d24d4204SJose E. Roman 
179d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
180792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
181d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
182792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
183d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
184d24d4204SJose E. Roman 
185d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
186792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
187d24d4204SJose E. Roman 
188d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
189792fecdfSBarry Smith     if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
190d24d4204SJose E. Roman 
191d24d4204SJose E. Roman     /* call PBLAS subroutine */
192792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
193d24d4204SJose E. Roman 
194d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
195792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
196d24d4204SJose E. Roman 
197d24d4204SJose E. Roman   } else {   /* non-transpose */
198d24d4204SJose E. Roman 
199d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
2009566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
2019566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
202d24d4204SJose E. Roman     xlld = 1;
203792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
204d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
2059566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
2069566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
207d24d4204SJose E. Roman     ylld = PetscMax(1,A->rmap->n);
208792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
209d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
210d24d4204SJose E. Roman 
211d24d4204SJose E. Roman     /* allocate 2d vectors */
212d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
213d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
2149566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
215d24d4204SJose E. Roman     ylld = PetscMax(1,lszy);
216d24d4204SJose E. Roman 
217d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
218792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
219d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
220792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
221d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
222d24d4204SJose E. Roman 
223d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
224792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
225d24d4204SJose E. Roman 
226d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
227792fecdfSBarry Smith     if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
228d24d4204SJose E. Roman 
229d24d4204SJose E. Roman     /* call PBLAS subroutine */
230792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
231d24d4204SJose E. Roman 
232d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
233792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
234d24d4204SJose E. Roman 
235d24d4204SJose E. Roman   }
2369566063dSJacob Faibussowitsch   PetscCall(PetscFree2(x2d,y2d));
237d24d4204SJose E. Roman   PetscFunctionReturn(0);
238d24d4204SJose E. Roman }
239d24d4204SJose E. Roman 
240d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
241d24d4204SJose E. Roman {
242d24d4204SJose E. Roman   const PetscScalar *xarray;
243d24d4204SJose E. Roman   PetscScalar       *yarray;
244d24d4204SJose E. Roman 
245d24d4204SJose E. Roman   PetscFunctionBegin;
2469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&xarray));
2479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&yarray));
2489566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
2499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&xarray));
2509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&yarray));
251d24d4204SJose E. Roman   PetscFunctionReturn(0);
252d24d4204SJose E. Roman }
253d24d4204SJose E. Roman 
254d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
255d24d4204SJose E. Roman {
256d24d4204SJose E. Roman   const PetscScalar *xarray;
257d24d4204SJose E. Roman   PetscScalar       *yarray;
258d24d4204SJose E. Roman 
259d24d4204SJose E. Roman   PetscFunctionBegin;
2609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&xarray));
2619566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&yarray));
2629566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
2639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&xarray));
2649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&yarray));
265d24d4204SJose E. Roman   PetscFunctionReturn(0);
266d24d4204SJose E. Roman }
267d24d4204SJose E. Roman 
268d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269d24d4204SJose E. Roman {
270d24d4204SJose E. Roman   const PetscScalar *xarray;
271d24d4204SJose E. Roman   PetscScalar       *zarray;
272d24d4204SJose E. Roman 
273d24d4204SJose E. Roman   PetscFunctionBegin;
2749566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y,z));
2759566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&xarray));
2769566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z,&zarray));
2779566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
2789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&xarray));
2799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z,&zarray));
280d24d4204SJose E. Roman   PetscFunctionReturn(0);
281d24d4204SJose E. Roman }
282d24d4204SJose E. Roman 
283d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
284d24d4204SJose E. Roman {
285d24d4204SJose E. Roman   const PetscScalar *xarray;
286d24d4204SJose E. Roman   PetscScalar       *zarray;
287d24d4204SJose E. Roman 
288d24d4204SJose E. Roman   PetscFunctionBegin;
2899566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y,z));
2909566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&xarray));
2919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z,&zarray));
2929566063dSJacob Faibussowitsch   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
2939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&xarray));
2949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z,&zarray));
295d24d4204SJose E. Roman   PetscFunctionReturn(0);
296d24d4204SJose E. Roman }
297d24d4204SJose E. Roman 
298d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
299d24d4204SJose E. Roman {
300d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
301d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
302d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
303d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
304d24d4204SJose E. Roman   PetscBLASInt  one=1;
305d24d4204SJose E. Roman 
306d24d4204SJose E. Roman   PetscFunctionBegin;
307792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
308d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
309d24d4204SJose E. Roman   PetscFunctionReturn(0);
310d24d4204SJose E. Roman }
311d24d4204SJose E. Roman 
312d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
313d24d4204SJose E. Roman {
314d24d4204SJose E. Roman   PetscFunctionBegin;
3159566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
3169566063dSJacob Faibussowitsch   PetscCall(MatSetType(C,MATSCALAPACK));
3179566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
318d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
319d24d4204SJose E. Roman   PetscFunctionReturn(0);
320d24d4204SJose E. Roman }
321d24d4204SJose E. Roman 
322d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
323d24d4204SJose E. Roman {
324d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
325d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
326d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
327d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
328d24d4204SJose E. Roman   PetscBLASInt  one=1;
329d24d4204SJose E. Roman 
330d24d4204SJose E. Roman   PetscFunctionBegin;
331792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
332d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
333d24d4204SJose E. Roman   PetscFunctionReturn(0);
334d24d4204SJose E. Roman }
335d24d4204SJose E. Roman 
336d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
337d24d4204SJose E. Roman {
338d24d4204SJose E. Roman   PetscFunctionBegin;
3399566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
3409566063dSJacob Faibussowitsch   PetscCall(MatSetType(C,MATSCALAPACK));
3419566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
342d24d4204SJose E. Roman   PetscFunctionReturn(0);
343d24d4204SJose E. Roman }
344d24d4204SJose E. Roman 
345d24d4204SJose E. Roman /* --------------------------------------- */
346d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
347d24d4204SJose E. Roman {
348d24d4204SJose E. Roman   PetscFunctionBegin;
349d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
350d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
351d24d4204SJose E. Roman   PetscFunctionReturn(0);
352d24d4204SJose E. Roman }
353d24d4204SJose E. Roman 
354d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
355d24d4204SJose E. Roman {
356d24d4204SJose E. Roman   PetscFunctionBegin;
357d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
358d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
359d24d4204SJose E. Roman   PetscFunctionReturn(0);
360d24d4204SJose E. Roman }
361d24d4204SJose E. Roman 
362d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
363d24d4204SJose E. Roman {
364d24d4204SJose E. Roman   Mat_Product    *product = C->product;
365d24d4204SJose E. Roman 
366d24d4204SJose E. Roman   PetscFunctionBegin;
367d24d4204SJose E. Roman   switch (product->type) {
368d24d4204SJose E. Roman     case MATPRODUCT_AB:
3699566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
370d24d4204SJose E. Roman       break;
371d24d4204SJose E. Roman     case MATPRODUCT_ABt:
3729566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
373d24d4204SJose E. Roman       break;
37498921bdaSJacob Faibussowitsch     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
375d24d4204SJose E. Roman   }
376d24d4204SJose E. Roman   PetscFunctionReturn(0);
377d24d4204SJose E. Roman }
378d24d4204SJose E. Roman /* --------------------------------------- */
379d24d4204SJose E. Roman 
380d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
381d24d4204SJose E. Roman {
382d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
383d24d4204SJose E. Roman   PetscScalar       *darray,*d2d,v;
384d24d4204SJose E. Roman   const PetscInt    *ranges;
385d24d4204SJose E. Roman   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
386d24d4204SJose E. Roman 
387d24d4204SJose E. Roman   PetscFunctionBegin;
3889566063dSJacob Faibussowitsch   PetscCall(VecGetArray(D,&darray));
389d24d4204SJose E. Roman 
390d24d4204SJose E. Roman   if (A->rmap->N<=A->cmap->N) {   /* row version */
391d24d4204SJose E. Roman 
392d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
3939566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
3949566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
395d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
396792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
397d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
398d24d4204SJose E. Roman 
399d24d4204SJose E. Roman     /* allocate 2d vector */
400d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
4019566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd,&d2d));
402d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
403d24d4204SJose E. Roman 
404d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
405792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
406d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
407d24d4204SJose E. Roman 
408d24d4204SJose E. Roman     /* collect diagonal */
409d24d4204SJose E. Roman     for (j=1;j<=a->M;j++) {
410792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
411792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
412d24d4204SJose E. Roman     }
413d24d4204SJose E. Roman 
414d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
415792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
4169566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
417d24d4204SJose E. Roman 
418d24d4204SJose E. Roman   } else {   /* column version */
419d24d4204SJose E. Roman 
420d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4219566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
4229566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
423d24d4204SJose E. Roman     dlld = 1;
424792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
425d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
426d24d4204SJose E. Roman 
427d24d4204SJose E. Roman     /* allocate 2d vector */
428d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
4299566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd,&d2d));
430d24d4204SJose E. Roman 
431d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
432792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
433d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
434d24d4204SJose E. Roman 
435d24d4204SJose E. Roman     /* collect diagonal */
436d24d4204SJose E. Roman     for (j=1;j<=a->N;j++) {
437792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
438792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
439d24d4204SJose E. Roman     }
440d24d4204SJose E. Roman 
441d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
442792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
4439566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
444d24d4204SJose E. Roman   }
445d24d4204SJose E. Roman 
4469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(D,&darray));
4479566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(D));
4489566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(D));
449d24d4204SJose E. Roman   PetscFunctionReturn(0);
450d24d4204SJose E. Roman }
451d24d4204SJose E. Roman 
452d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
453d24d4204SJose E. Roman {
454d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
455d24d4204SJose E. Roman   const PetscScalar *d;
456d24d4204SJose E. Roman   const PetscInt    *ranges;
457d24d4204SJose E. Roman   PetscScalar       *d2d;
458d24d4204SJose E. Roman   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
459d24d4204SJose E. Roman 
460d24d4204SJose E. Roman   PetscFunctionBegin;
461d24d4204SJose E. Roman   if (R) {
4629566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
463d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4649566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
4659566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
466d24d4204SJose E. Roman     dlld = 1;
467792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
468d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
469d24d4204SJose E. Roman 
470d24d4204SJose E. Roman     /* allocate 2d vector */
471d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
4729566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd,&d2d));
473d24d4204SJose E. Roman 
474d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
475792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
476d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
477d24d4204SJose E. Roman 
478d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
479792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
480d24d4204SJose E. Roman 
481d24d4204SJose E. Roman     /* broadcast along process columns */
482d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
483d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
484d24d4204SJose E. Roman 
485d24d4204SJose E. Roman     /* local scaling */
486d24d4204SJose E. Roman     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
487d24d4204SJose E. Roman 
4889566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
4899566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
490d24d4204SJose E. Roman   }
491d24d4204SJose E. Roman   if (L) {
4929566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
493d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4949566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
4959566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
496d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
497792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
498d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
499d24d4204SJose E. Roman 
500d24d4204SJose E. Roman     /* allocate 2d vector */
501d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
5029566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd,&d2d));
503d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
504d24d4204SJose E. Roman 
505d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
506792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
507d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
508d24d4204SJose E. Roman 
509d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
510792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
511d24d4204SJose E. Roman 
512d24d4204SJose E. Roman     /* broadcast along process rows */
513d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
514d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
515d24d4204SJose E. Roman 
516d24d4204SJose E. Roman     /* local scaling */
517d24d4204SJose E. Roman     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
518d24d4204SJose E. Roman 
5199566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
5209566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
521d24d4204SJose E. Roman   }
522d24d4204SJose E. Roman   PetscFunctionReturn(0);
523d24d4204SJose E. Roman }
524d24d4204SJose E. Roman 
525d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
526d24d4204SJose E. Roman {
527d24d4204SJose E. Roman   PetscFunctionBegin;
528d24d4204SJose E. Roman   *missing = PETSC_FALSE;
529d24d4204SJose E. Roman   PetscFunctionReturn(0);
530d24d4204SJose E. Roman }
531d24d4204SJose E. Roman 
532d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
533d24d4204SJose E. Roman {
534d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
535d24d4204SJose E. Roman   PetscBLASInt  n,one=1;
536d24d4204SJose E. Roman 
537d24d4204SJose E. Roman   PetscFunctionBegin;
538d24d4204SJose E. Roman   n = x->lld*x->locc;
539792fecdfSBarry Smith   PetscCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
540d24d4204SJose E. Roman   PetscFunctionReturn(0);
541d24d4204SJose E. Roman }
542d24d4204SJose E. Roman 
543d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
544d24d4204SJose E. Roman {
545d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
546d24d4204SJose E. Roman   PetscBLASInt  i,n;
547d24d4204SJose E. Roman   PetscScalar   v;
548d24d4204SJose E. Roman 
549d24d4204SJose E. Roman   PetscFunctionBegin;
550d24d4204SJose E. Roman   n = PetscMin(x->M,x->N);
551d24d4204SJose E. Roman   for (i=1;i<=n;i++) {
552792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
553d24d4204SJose E. Roman     v += alpha;
554792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
555d24d4204SJose E. Roman   }
556d24d4204SJose E. Roman   PetscFunctionReturn(0);
557d24d4204SJose E. Roman }
558d24d4204SJose E. Roman 
559d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
560d24d4204SJose E. Roman {
561d24d4204SJose E. Roman   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
562d24d4204SJose E. Roman   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
563d24d4204SJose E. Roman   PetscBLASInt   one=1;
564d24d4204SJose E. Roman   PetscScalar    beta=1.0;
565d24d4204SJose E. Roman 
566d24d4204SJose E. Roman   PetscFunctionBegin;
567d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y,1,X,3);
568792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
5699566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
570d24d4204SJose E. Roman   PetscFunctionReturn(0);
571d24d4204SJose E. Roman }
572d24d4204SJose E. Roman 
573d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
574d24d4204SJose E. Roman {
575d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
576d24d4204SJose E. Roman   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
577d24d4204SJose E. Roman 
578d24d4204SJose E. Roman   PetscFunctionBegin;
5799566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
5809566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
581d24d4204SJose E. Roman   PetscFunctionReturn(0);
582d24d4204SJose E. Roman }
583d24d4204SJose E. Roman 
584d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
585d24d4204SJose E. Roman {
586d24d4204SJose E. Roman   Mat            Bs;
587d24d4204SJose E. Roman   MPI_Comm       comm;
588d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
589d24d4204SJose E. Roman 
590d24d4204SJose E. Roman   PetscFunctionBegin;
5919566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5929566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,&Bs));
5939566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
5949566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bs,MATSCALAPACK));
595d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
596d24d4204SJose E. Roman   b->M    = a->M;
597d24d4204SJose E. Roman   b->N    = a->N;
598d24d4204SJose E. Roman   b->mb   = a->mb;
599d24d4204SJose E. Roman   b->nb   = a->nb;
600d24d4204SJose E. Roman   b->rsrc = a->rsrc;
601d24d4204SJose E. Roman   b->csrc = a->csrc;
6029566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Bs));
603d24d4204SJose E. Roman   *B = Bs;
604d24d4204SJose E. Roman   if (op == MAT_COPY_VALUES) {
6059566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
606d24d4204SJose E. Roman   }
607d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
608d24d4204SJose E. Roman   PetscFunctionReturn(0);
609d24d4204SJose E. Roman }
610d24d4204SJose E. Roman 
611d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
612d24d4204SJose E. Roman {
613d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
614d24d4204SJose E. Roman   Mat            Bs = *B;
615d24d4204SJose E. Roman   PetscBLASInt   one=1;
616d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
617d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
618d24d4204SJose E. Roman   PetscInt       i,j;
619d24d4204SJose E. Roman #endif
620d24d4204SJose E. Roman 
621d24d4204SJose E. Roman   PetscFunctionBegin;
6227fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B));
623d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
6249566063dSJacob Faibussowitsch     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
625d24d4204SJose E. Roman     *B = Bs;
626d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
627d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
628792fecdfSBarry Smith   PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
629d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
630d24d4204SJose E. Roman   /* undo conjugation */
631d24d4204SJose E. Roman   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
632d24d4204SJose E. Roman #endif
633d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
634d24d4204SJose E. Roman   PetscFunctionReturn(0);
635d24d4204SJose E. Roman }
636d24d4204SJose E. Roman 
637d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
638d24d4204SJose E. Roman {
639d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
640d24d4204SJose E. Roman   PetscInt      i,j;
641d24d4204SJose E. Roman 
642d24d4204SJose E. Roman   PetscFunctionBegin;
643d24d4204SJose E. Roman   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
644d24d4204SJose E. Roman   PetscFunctionReturn(0);
645d24d4204SJose E. Roman }
646d24d4204SJose E. Roman 
647d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
648d24d4204SJose E. Roman {
649d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
650d24d4204SJose E. Roman   Mat            Bs = *B;
651d24d4204SJose E. Roman   PetscBLASInt   one=1;
652d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
653d24d4204SJose E. Roman 
654d24d4204SJose E. Roman   PetscFunctionBegin;
655d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
6569566063dSJacob Faibussowitsch     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
657d24d4204SJose E. Roman     *B = Bs;
658d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
659d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
660792fecdfSBarry Smith   PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
661d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
662d24d4204SJose E. Roman   PetscFunctionReturn(0);
663d24d4204SJose E. Roman }
664d24d4204SJose E. Roman 
665d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
666d24d4204SJose E. Roman {
667d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
668d24d4204SJose E. Roman   PetscScalar    *x,*x2d;
669d24d4204SJose E. Roman   const PetscInt *ranges;
670d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
671d24d4204SJose E. Roman 
672d24d4204SJose E. Roman   PetscFunctionBegin;
6739566063dSJacob Faibussowitsch   PetscCall(VecCopy(B,X));
6749566063dSJacob Faibussowitsch   PetscCall(VecGetArray(X,&x));
675d24d4204SJose E. Roman 
676d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
6779566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
6789566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
679d24d4204SJose E. Roman   xlld = PetscMax(1,A->rmap->n);
680792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
681d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
682d24d4204SJose E. Roman 
683d24d4204SJose E. Roman   /* allocate 2d vector */
684d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
6859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(lszx,&x2d));
686d24d4204SJose E. Roman   xlld = PetscMax(1,lszx);
687d24d4204SJose E. Roman 
688d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
689792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
690d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
691d24d4204SJose E. Roman 
692d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
693792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
694d24d4204SJose E. Roman 
695d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
696d24d4204SJose E. Roman   switch (A->factortype) {
697d24d4204SJose E. Roman     case MAT_FACTOR_LU:
698792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
699d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
700d24d4204SJose E. Roman       break;
701d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
702792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
703d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
704d24d4204SJose E. Roman       break;
705d24d4204SJose E. Roman     default:
706d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
707d24d4204SJose E. Roman   }
708d24d4204SJose E. Roman 
709d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
710792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
711d24d4204SJose E. Roman 
7129566063dSJacob Faibussowitsch   PetscCall(PetscFree(x2d));
7139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(X,&x));
714d24d4204SJose E. Roman   PetscFunctionReturn(0);
715d24d4204SJose E. Roman }
716d24d4204SJose E. Roman 
717d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
718d24d4204SJose E. Roman {
719d24d4204SJose E. Roman   PetscFunctionBegin;
7209566063dSJacob Faibussowitsch   PetscCall(MatSolve_ScaLAPACK(A,B,X));
7219566063dSJacob Faibussowitsch   PetscCall(VecAXPY(X,1,Y));
722d24d4204SJose E. Roman   PetscFunctionReturn(0);
723d24d4204SJose E. Roman }
724d24d4204SJose E. Roman 
725d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
726d24d4204SJose E. Roman {
727d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
728d24d4204SJose E. Roman   PetscBool      flg1,flg2;
729d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
730d24d4204SJose E. Roman 
731d24d4204SJose E. Roman   PetscFunctionBegin;
7329566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
7339566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
73408401ef6SPierre Jolivet   PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
735d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B,1,X,2);
736d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)B->data;
737d24d4204SJose E. Roman   x = (Mat_ScaLAPACK*)X->data;
7389566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
739d24d4204SJose E. Roman 
740d24d4204SJose E. Roman   switch (A->factortype) {
741d24d4204SJose E. Roman     case MAT_FACTOR_LU:
742792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
743d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
744d24d4204SJose E. Roman       break;
745d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
746792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
747d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
748d24d4204SJose E. Roman       break;
749d24d4204SJose E. Roman     default:
750d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
751d24d4204SJose E. Roman   }
752d24d4204SJose E. Roman   PetscFunctionReturn(0);
753d24d4204SJose E. Roman }
754d24d4204SJose E. Roman 
755d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
756d24d4204SJose E. Roman {
757d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
759d24d4204SJose E. Roman 
760d24d4204SJose E. Roman   PetscFunctionBegin;
761d24d4204SJose E. Roman   if (!a->pivots) {
7629566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots));
7639566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
764d24d4204SJose E. Roman   }
765792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf",info);
767d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
768d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
769d24d4204SJose E. Roman 
7709566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
7719566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
772d24d4204SJose E. Roman   PetscFunctionReturn(0);
773d24d4204SJose E. Roman }
774d24d4204SJose E. Roman 
775d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776d24d4204SJose E. Roman {
777d24d4204SJose E. Roman   PetscFunctionBegin;
7789566063dSJacob Faibussowitsch   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
7799566063dSJacob Faibussowitsch   PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info));
780d24d4204SJose E. Roman   PetscFunctionReturn(0);
781d24d4204SJose E. Roman }
782d24d4204SJose E. Roman 
783d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
784d24d4204SJose E. Roman {
785d24d4204SJose E. Roman   PetscFunctionBegin;
786d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
787d24d4204SJose E. Roman   PetscFunctionReturn(0);
788d24d4204SJose E. Roman }
789d24d4204SJose E. Roman 
790d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
791d24d4204SJose E. Roman {
792d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
793d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
794d24d4204SJose E. Roman 
795d24d4204SJose E. Roman   PetscFunctionBegin;
796792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
797d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf",info);
798d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
799d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
800d24d4204SJose E. Roman 
8019566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
8029566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
803d24d4204SJose E. Roman   PetscFunctionReturn(0);
804d24d4204SJose E. Roman }
805d24d4204SJose E. Roman 
806d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
807d24d4204SJose E. Roman {
808d24d4204SJose E. Roman   PetscFunctionBegin;
8099566063dSJacob Faibussowitsch   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
8109566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info));
811d24d4204SJose E. Roman   PetscFunctionReturn(0);
812d24d4204SJose E. Roman }
813d24d4204SJose E. Roman 
814d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
815d24d4204SJose E. Roman {
816d24d4204SJose E. Roman   PetscFunctionBegin;
817d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
818d24d4204SJose E. Roman   PetscFunctionReturn(0);
819d24d4204SJose E. Roman }
820d24d4204SJose E. Roman 
821d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
822d24d4204SJose E. Roman {
823d24d4204SJose E. Roman   PetscFunctionBegin;
824d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
825d24d4204SJose E. Roman   PetscFunctionReturn(0);
826d24d4204SJose E. Roman }
827d24d4204SJose E. Roman 
828d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
829d24d4204SJose E. Roman {
830d24d4204SJose E. Roman   Mat            B;
83159172f18SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
832d24d4204SJose E. Roman 
833d24d4204SJose E. Roman   PetscFunctionBegin;
834d24d4204SJose E. Roman   /* Create the factorization matrix */
8359566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
83666e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
837d24d4204SJose E. Roman   B->factortype = ftype;
8389566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
8399566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
840d24d4204SJose E. Roman 
8419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
842d24d4204SJose E. Roman   *F = B;
843d24d4204SJose E. Roman   PetscFunctionReturn(0);
844d24d4204SJose E. Roman }
845d24d4204SJose E. Roman 
846d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
847d24d4204SJose E. Roman {
848d24d4204SJose E. Roman   PetscFunctionBegin;
8499566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
8509566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
851d24d4204SJose E. Roman   PetscFunctionReturn(0);
852d24d4204SJose E. Roman }
853d24d4204SJose E. Roman 
854d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
855d24d4204SJose E. Roman {
856d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
857d24d4204SJose E. Roman   PetscBLASInt   one=1,lwork=0;
858d24d4204SJose E. Roman   const char     *ntype;
859d68f4f38SPierre Jolivet   PetscScalar    *work=NULL,dummy;
860d24d4204SJose E. Roman 
861d24d4204SJose E. Roman   PetscFunctionBegin;
862d24d4204SJose E. Roman   switch (type) {
863d24d4204SJose E. Roman     case NORM_1:
864d24d4204SJose E. Roman       ntype = "1";
865d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
866d24d4204SJose E. Roman       break;
867d24d4204SJose E. Roman     case NORM_FROBENIUS:
868d24d4204SJose E. Roman       ntype = "F";
869d24d4204SJose E. Roman       work  = &dummy;
870d24d4204SJose E. Roman       break;
871d24d4204SJose E. Roman     case NORM_INFINITY:
872d24d4204SJose E. Roman       ntype = "I";
873d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
874d24d4204SJose E. Roman       break;
875d24d4204SJose E. Roman     default:
876d24d4204SJose E. Roman       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
877d24d4204SJose E. Roman   }
8789566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscMalloc1(lwork,&work));
879d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
8809566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscFree(work));
881d24d4204SJose E. Roman   PetscFunctionReturn(0);
882d24d4204SJose E. Roman }
883d24d4204SJose E. Roman 
884d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
885d24d4204SJose E. Roman {
886d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
887d24d4204SJose E. Roman 
888d24d4204SJose E. Roman   PetscFunctionBegin;
8899566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(a->loc,a->lld*a->locc));
890d24d4204SJose E. Roman   PetscFunctionReturn(0);
891d24d4204SJose E. Roman }
892d24d4204SJose E. Roman 
893d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
894d24d4204SJose E. Roman {
895d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
896d24d4204SJose E. Roman   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
897d24d4204SJose E. Roman 
898d24d4204SJose E. Roman   PetscFunctionBegin;
899d24d4204SJose E. Roman   if (rows) {
900d24d4204SJose E. Roman     n     = a->locr;
901d24d4204SJose E. Roman     nb    = a->mb;
902d24d4204SJose E. Roman     isrc  = a->rsrc;
903d24d4204SJose E. Roman     nproc = a->grid->nprow;
904d24d4204SJose E. Roman     iproc = a->grid->myrow;
9059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n,&idx));
906d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
9079566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
908d24d4204SJose E. Roman   }
909d24d4204SJose E. Roman   if (cols) {
910d24d4204SJose E. Roman     n     = a->locc;
911d24d4204SJose E. Roman     nb    = a->nb;
912d24d4204SJose E. Roman     isrc  = a->csrc;
913d24d4204SJose E. Roman     nproc = a->grid->npcol;
914d24d4204SJose E. Roman     iproc = a->grid->mycol;
9159566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n,&idx));
916d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
9179566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
918d24d4204SJose E. Roman   }
919d24d4204SJose E. Roman   PetscFunctionReturn(0);
920d24d4204SJose E. Roman }
921d24d4204SJose E. Roman 
922d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
923d24d4204SJose E. Roman {
924d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
925d24d4204SJose E. Roman   Mat            Bmpi;
926d24d4204SJose E. Roman   MPI_Comm       comm;
9274b1a79daSJose E. Roman   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
9284b1a79daSJose E. Roman   const PetscInt *ranges,*branges,*cwork;
9294b1a79daSJose E. Roman   const PetscScalar *vwork;
930d24d4204SJose E. Roman   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
931d24d4204SJose E. Roman   PetscScalar    *barray;
9324b1a79daSJose E. Roman   PetscBool      differ=PETSC_FALSE;
9334b1a79daSJose E. Roman   PetscMPIInt    size;
934d24d4204SJose E. Roman 
935d24d4204SJose E. Roman   PetscFunctionBegin;
9369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
9379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
9384b1a79daSJose E. Roman 
9394b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9409566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm,&size));
9419566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges));
9424b1a79daSJose E. Roman     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
9434b1a79daSJose E. Roman   }
9444b1a79daSJose E. Roman 
9454b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
9469566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm,&Bmpi));
9474b1a79daSJose E. Roman     m = PETSC_DECIDE;
9489566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
9494b1a79daSJose E. Roman     n = PETSC_DECIDE;
9509566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
9519566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
9529566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi,MATDENSE));
9539566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
9544b1a79daSJose E. Roman 
9554b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9569566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
9574b1a79daSJose E. Roman     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
958792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
9594b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
9604b1a79daSJose E. Roman 
9614b1a79daSJose E. Roman     /* redistribute matrix */
9629566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi,&barray));
963792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
9649566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
9659566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
9669566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
9674b1a79daSJose E. Roman 
9684b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
9699566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend));
9704b1a79daSJose E. Roman     for (i=rstart;i<rend;i++) {
9719566063dSJacob Faibussowitsch       PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
9729566063dSJacob Faibussowitsch       PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
9739566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
9744b1a79daSJose E. Roman     }
9759566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
9769566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
9779566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Bmpi));
9784b1a79daSJose E. Roman 
9794b1a79daSJose E. Roman   } else {  /* normal cases */
980d24d4204SJose E. Roman 
981d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
982d24d4204SJose E. Roman     else {
9839566063dSJacob Faibussowitsch       PetscCall(MatCreate(comm,&Bmpi));
98492c846b4SJose E. Roman       m = PETSC_DECIDE;
9859566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
98692c846b4SJose E. Roman       n = PETSC_DECIDE;
9879566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
9889566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(Bmpi,m,n,M,N));
9899566063dSJacob Faibussowitsch       PetscCall(MatSetType(Bmpi,MATDENSE));
9909566063dSJacob Faibussowitsch       PetscCall(MatSetUp(Bmpi));
991d24d4204SJose E. Roman     }
992d24d4204SJose E. Roman 
993d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9949566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
995d24d4204SJose E. Roman     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
996792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
997d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
998d24d4204SJose E. Roman 
999d24d4204SJose E. Roman     /* redistribute matrix */
10009566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi,&barray));
1001792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
10029566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
1003d24d4204SJose E. Roman 
10049566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
10059566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1006d24d4204SJose E. Roman     if (reuse == MAT_INPLACE_MATRIX) {
10079566063dSJacob Faibussowitsch       PetscCall(MatHeaderReplace(A,&Bmpi));
1008d24d4204SJose E. Roman     } else *B = Bmpi;
10094b1a79daSJose E. Roman   }
1010d24d4204SJose E. Roman   PetscFunctionReturn(0);
1011d24d4204SJose E. Roman }
1012d24d4204SJose E. Roman 
1013b12397e7SPierre Jolivet static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct)
1014b12397e7SPierre Jolivet {
1015b12397e7SPierre Jolivet   const PetscInt *ranges;
1016b12397e7SPierre Jolivet   PetscMPIInt    size;
1017b12397e7SPierre Jolivet   PetscInt       i,n;
1018b12397e7SPierre Jolivet 
1019b12397e7SPierre Jolivet   PetscFunctionBegin;
1020b12397e7SPierre Jolivet   *correct = PETSC_TRUE;
1021b12397e7SPierre Jolivet   PetscCallMPI(MPI_Comm_size(map->comm,&size));
1022b12397e7SPierre Jolivet   if (size>1) {
1023b12397e7SPierre Jolivet     PetscCall(PetscLayoutGetRanges(map,&ranges));
1024b12397e7SPierre Jolivet     n = ranges[1]-ranges[0];
1025b12397e7SPierre Jolivet     for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break;
1026b12397e7SPierre Jolivet     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n));
1027b12397e7SPierre Jolivet   }
1028b12397e7SPierre Jolivet   PetscFunctionReturn(0);
1029b12397e7SPierre Jolivet }
1030b12397e7SPierre Jolivet 
1031d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1032d24d4204SJose E. Roman {
1033d24d4204SJose E. Roman   Mat_ScaLAPACK  *b;
1034d24d4204SJose E. Roman   Mat            Bmpi;
1035d24d4204SJose E. Roman   MPI_Comm       comm;
103692c846b4SJose E. Roman   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1037b12397e7SPierre Jolivet   const PetscInt *ranges,*rows,*cols;
1038d24d4204SJose E. Roman   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1039d24d4204SJose E. Roman   PetscScalar    *aarray;
1040b12397e7SPierre Jolivet   IS             ir,ic;
10414e8b52a1SJose E. Roman   PetscInt       lda;
1042b12397e7SPierre Jolivet   PetscBool      flg;
1043d24d4204SJose E. Roman 
1044d24d4204SJose E. Roman   PetscFunctionBegin;
10459566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1046d24d4204SJose E. Roman 
1047d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1048d24d4204SJose E. Roman   else {
10499566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm,&Bmpi));
105092c846b4SJose E. Roman     m = PETSC_DECIDE;
10519566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
105292c846b4SJose E. Roman     n = PETSC_DECIDE;
10539566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
10549566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
10559566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi,MATSCALAPACK));
10569566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
1057d24d4204SJose E. Roman   }
1058d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bmpi->data;
1059d24d4204SJose E. Roman 
1060b12397e7SPierre Jolivet   PetscCall(MatDenseGetLDA(A,&lda));
1061b12397e7SPierre Jolivet   PetscCall(MatDenseGetArray(A,&aarray));
1062b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg));
1063b12397e7SPierre Jolivet   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg));
1064b12397e7SPierre Jolivet   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1065d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for A (1d block distribution) */
10669566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
10679566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
10684e8b52a1SJose E. Roman     lld = PetscMax(lda,1);  /* local leading dimension */
1069792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1070d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
1071d24d4204SJose E. Roman 
1072d24d4204SJose E. Roman     /* redistribute matrix */
1073792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1074b12397e7SPierre Jolivet     Bmpi->nooffprocentries = PETSC_TRUE;
1075b12397e7SPierre Jolivet   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1076b12397e7SPierre Jolivet     PetscCheck(lda == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")",lda,A->rmap->n);
1077b12397e7SPierre Jolivet     b->roworiented = PETSC_FALSE;
1078b12397e7SPierre Jolivet     PetscCall(MatGetOwnershipIS(A,&ir,&ic));
1079b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ir,&rows));
1080b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ic,&cols));
1081b12397e7SPierre Jolivet     PetscCall(MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES));
1082b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ir,&rows));
1083b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ic,&cols));
1084b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ic));
1085b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ir));
1086b12397e7SPierre Jolivet   }
10879566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A,&aarray));
10889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
10899566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1090d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
10919566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A,&Bmpi));
1092d24d4204SJose E. Roman   } else *B = Bmpi;
1093d24d4204SJose E. Roman   PetscFunctionReturn(0);
1094d24d4204SJose E. Roman }
1095d24d4204SJose E. Roman 
1096d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1097d24d4204SJose E. Roman {
1098d24d4204SJose E. Roman   Mat               mat_scal;
1099d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1100d24d4204SJose E. Roman   const PetscInt    *cols;
1101d24d4204SJose E. Roman   const PetscScalar *vals;
1102d24d4204SJose E. Roman 
1103d24d4204SJose E. Roman   PetscFunctionBegin;
1104d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1105d24d4204SJose E. Roman     mat_scal = *newmat;
11069566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1107d24d4204SJose E. Roman   } else {
11089566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1109d24d4204SJose E. Roman     m = PETSC_DECIDE;
11109566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1111d24d4204SJose E. Roman     n = PETSC_DECIDE;
11129566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
11139566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
11149566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
11159566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1116d24d4204SJose E. Roman   }
1117d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
11189566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
11199566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
11209566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1121d24d4204SJose E. Roman   }
11229566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
11239566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1124d24d4204SJose E. Roman 
11259566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1126d24d4204SJose E. Roman   else *newmat = mat_scal;
1127d24d4204SJose E. Roman   PetscFunctionReturn(0);
1128d24d4204SJose E. Roman }
1129d24d4204SJose E. Roman 
1130d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1131d24d4204SJose E. Roman {
1132d24d4204SJose E. Roman   Mat               mat_scal;
1133d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1134d24d4204SJose E. Roman   const PetscInt    *cols;
1135d24d4204SJose E. Roman   const PetscScalar *vals;
1136d24d4204SJose E. Roman   PetscScalar       v;
1137d24d4204SJose E. Roman 
1138d24d4204SJose E. Roman   PetscFunctionBegin;
1139d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1140d24d4204SJose E. Roman     mat_scal = *newmat;
11419566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1142d24d4204SJose E. Roman   } else {
11439566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1144d24d4204SJose E. Roman     m = PETSC_DECIDE;
11459566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1146d24d4204SJose E. Roman     n = PETSC_DECIDE;
11479566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
11489566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
11499566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
11509566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1151d24d4204SJose E. Roman   }
11529566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
1153d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
11549566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
11559566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1156d24d4204SJose E. Roman     for (j=0;j<ncols;j++) { /* lower triangular part */
1157d24d4204SJose E. Roman       if (cols[j] == row) continue;
1158b94d7dedSBarry Smith       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
11599566063dSJacob Faibussowitsch       PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1160d24d4204SJose E. Roman     }
11619566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1162d24d4204SJose E. Roman   }
11639566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
11649566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
11659566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1166d24d4204SJose E. Roman 
11679566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1168d24d4204SJose E. Roman   else *newmat = mat_scal;
1169d24d4204SJose E. Roman   PetscFunctionReturn(0);
1170d24d4204SJose E. Roman }
1171d24d4204SJose E. Roman 
1172d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1173d24d4204SJose E. Roman {
1174d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1175d24d4204SJose E. Roman   PetscInt       sz=0;
1176d24d4204SJose E. Roman 
1177d24d4204SJose E. Roman   PetscFunctionBegin;
11789566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
11799566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1180d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1181d24d4204SJose E. Roman 
11829566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
11839566063dSJacob Faibussowitsch   PetscCall(PetscIntMultError(a->lld,a->locc,&sz));
11849566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(sz,&a->loc));
11859566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1186d24d4204SJose E. Roman 
1187d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
1188d24d4204SJose E. Roman   PetscFunctionReturn(0);
1189d24d4204SJose E. Roman }
1190d24d4204SJose E. Roman 
1191d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1192d24d4204SJose E. Roman {
1193d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1194d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1195d24d4204SJose E. Roman   PetscBool          flg;
1196d24d4204SJose E. Roman   MPI_Comm           icomm;
1197d24d4204SJose E. Roman 
1198d24d4204SJose E. Roman   PetscFunctionBegin;
11999566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&A->stash));
12009566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
12019566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->pivots));
12029566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
12039566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1204d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1205d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1206d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1207d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
12089566063dSJacob Faibussowitsch     PetscCall(PetscFree(grid));
12099566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1210d24d4204SJose E. Roman   }
12119566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
12129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
12139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
12149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
12159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
12169566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1217d24d4204SJose E. Roman   PetscFunctionReturn(0);
1218d24d4204SJose E. Roman }
1219d24d4204SJose E. Roman 
1220d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1221d24d4204SJose E. Roman {
1222d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1223d24d4204SJose E. Roman   PetscBLASInt   info=0;
1224b12397e7SPierre Jolivet   PetscBool      flg;
1225d24d4204SJose E. Roman 
1226d24d4204SJose E. Roman   PetscFunctionBegin;
12279566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12289566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1229d24d4204SJose E. Roman 
1230b12397e7SPierre Jolivet   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1231b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg));
1232b12397e7SPierre Jolivet   PetscCheck(flg,A->rmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1233b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg));
1234b12397e7SPierre Jolivet   PetscCheck(flg,A->cmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1235d24d4204SJose E. Roman 
1236d24d4204SJose E. Roman   /* compute local sizes */
12379566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->rmap->N,&a->M));
12389566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->cmap->N,&a->N));
1239d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1240d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1241d24d4204SJose E. Roman   a->lld  = PetscMax(1,a->locr);
1242d24d4204SJose E. Roman 
1243d24d4204SJose E. Roman   /* allocate local array */
12449566063dSJacob Faibussowitsch   PetscCall(MatScaLAPACKSetPreallocation(A));
1245d24d4204SJose E. Roman 
1246d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1247792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1248d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1249d24d4204SJose E. Roman   PetscFunctionReturn(0);
1250d24d4204SJose E. Roman }
1251d24d4204SJose E. Roman 
1252d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1253d24d4204SJose E. Roman {
1254d24d4204SJose E. Roman   PetscInt       nstash,reallocs;
1255d24d4204SJose E. Roman 
1256d24d4204SJose E. Roman   PetscFunctionBegin;
1257d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
12589566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL));
12599566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
12609566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1261d24d4204SJose E. Roman   PetscFunctionReturn(0);
1262d24d4204SJose E. Roman }
1263d24d4204SJose E. Roman 
1264d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265d24d4204SJose E. Roman {
1266d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1267d24d4204SJose E. Roman   PetscMPIInt    n;
1268d24d4204SJose E. Roman   PetscInt       i,flg,*row,*col;
1269d24d4204SJose E. Roman   PetscScalar    *val;
1270d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1271d24d4204SJose E. Roman 
1272d24d4204SJose E. Roman   PetscFunctionBegin;
1273d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1274d24d4204SJose E. Roman   while (1) {
12759566063dSJacob Faibussowitsch     PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1276d24d4204SJose E. Roman     if (!flg) break;
1277d24d4204SJose E. Roman     for (i=0;i<n;i++) {
12789566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(row[i]+1,&gridx));
12799566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(col[i]+1,&gcidx));
1280792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1281aed4548fSBarry Smith       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1282d24d4204SJose E. Roman       switch (A->insertmode) {
1283d24d4204SJose E. Roman         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1284d24d4204SJose E. Roman         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
128598921bdaSJacob Faibussowitsch         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1286d24d4204SJose E. Roman       }
1287d24d4204SJose E. Roman     }
1288d24d4204SJose E. Roman   }
12899566063dSJacob Faibussowitsch   PetscCall(MatStashScatterEnd_Private(&A->stash));
1290d24d4204SJose E. Roman   PetscFunctionReturn(0);
1291d24d4204SJose E. Roman }
1292d24d4204SJose E. Roman 
1293d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1294d24d4204SJose E. Roman {
1295d24d4204SJose E. Roman   Mat            Adense,As;
1296d24d4204SJose E. Roman   MPI_Comm       comm;
1297d24d4204SJose E. Roman 
1298d24d4204SJose E. Roman   PetscFunctionBegin;
12999566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
13009566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,&Adense));
13019566063dSJacob Faibussowitsch   PetscCall(MatSetType(Adense,MATDENSE));
13029566063dSJacob Faibussowitsch   PetscCall(MatLoad(Adense,viewer));
13039566063dSJacob Faibussowitsch   PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
13049566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
13059566063dSJacob Faibussowitsch   PetscCall(MatHeaderReplace(newMat,&As));
1306d24d4204SJose E. Roman   PetscFunctionReturn(0);
1307d24d4204SJose E. Roman }
1308d24d4204SJose E. Roman 
1309d24d4204SJose E. Roman /* -------------------------------------------------------------------*/
1310d24d4204SJose E. Roman static struct _MatOps MatOps_Values = {
1311d24d4204SJose E. Roman        MatSetValues_ScaLAPACK,
1312d24d4204SJose E. Roman        0,
1313d24d4204SJose E. Roman        0,
1314d24d4204SJose E. Roman        MatMult_ScaLAPACK,
1315d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK,
1316d24d4204SJose E. Roman        MatMultTranspose_ScaLAPACK,
1317d24d4204SJose E. Roman        MatMultTransposeAdd_ScaLAPACK,
1318d24d4204SJose E. Roman        MatSolve_ScaLAPACK,
1319d24d4204SJose E. Roman        MatSolveAdd_ScaLAPACK,
1320d24d4204SJose E. Roman        0,
1321d24d4204SJose E. Roman /*10*/ 0,
1322d24d4204SJose E. Roman        MatLUFactor_ScaLAPACK,
1323d24d4204SJose E. Roman        MatCholeskyFactor_ScaLAPACK,
1324d24d4204SJose E. Roman        0,
1325d24d4204SJose E. Roman        MatTranspose_ScaLAPACK,
1326d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK,
1327d24d4204SJose E. Roman        0,
1328d24d4204SJose E. Roman        MatGetDiagonal_ScaLAPACK,
1329d24d4204SJose E. Roman        MatDiagonalScale_ScaLAPACK,
1330d24d4204SJose E. Roman        MatNorm_ScaLAPACK,
1331d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK,
1332d24d4204SJose E. Roman        MatAssemblyEnd_ScaLAPACK,
1333d24d4204SJose E. Roman        MatSetOption_ScaLAPACK,
1334d24d4204SJose E. Roman        MatZeroEntries_ScaLAPACK,
1335d24d4204SJose E. Roman /*24*/ 0,
1336d24d4204SJose E. Roman        MatLUFactorSymbolic_ScaLAPACK,
1337d24d4204SJose E. Roman        MatLUFactorNumeric_ScaLAPACK,
1338d24d4204SJose E. Roman        MatCholeskyFactorSymbolic_ScaLAPACK,
1339d24d4204SJose E. Roman        MatCholeskyFactorNumeric_ScaLAPACK,
1340d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK,
1341d24d4204SJose E. Roman        0,
1342d24d4204SJose E. Roman        0,
1343d24d4204SJose E. Roman        0,
1344d24d4204SJose E. Roman        0,
1345d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK,
1346d24d4204SJose E. Roman        0,
1347d24d4204SJose E. Roman        0,
1348d24d4204SJose E. Roman        0,
1349d24d4204SJose E. Roman        0,
1350d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK,
1351d24d4204SJose E. Roman        0,
1352d24d4204SJose E. Roman        0,
1353d24d4204SJose E. Roman        0,
1354d24d4204SJose E. Roman        MatCopy_ScaLAPACK,
1355d24d4204SJose E. Roman /*44*/ 0,
1356d24d4204SJose E. Roman        MatScale_ScaLAPACK,
1357d24d4204SJose E. Roman        MatShift_ScaLAPACK,
1358d24d4204SJose E. Roman        0,
1359d24d4204SJose E. Roman        0,
1360d24d4204SJose E. Roman /*49*/ 0,
1361d24d4204SJose E. Roman        0,
1362d24d4204SJose E. Roman        0,
1363d24d4204SJose E. Roman        0,
1364d24d4204SJose E. Roman        0,
1365d24d4204SJose E. Roman /*54*/ 0,
1366d24d4204SJose E. Roman        0,
1367d24d4204SJose E. Roman        0,
1368d24d4204SJose E. Roman        0,
1369d24d4204SJose E. Roman        0,
1370d24d4204SJose E. Roman /*59*/ 0,
1371d24d4204SJose E. Roman        MatDestroy_ScaLAPACK,
1372d24d4204SJose E. Roman        MatView_ScaLAPACK,
1373d24d4204SJose E. Roman        0,
1374d24d4204SJose E. Roman        0,
1375d24d4204SJose E. Roman /*64*/ 0,
1376d24d4204SJose E. Roman        0,
1377d24d4204SJose E. Roman        0,
1378d24d4204SJose E. Roman        0,
1379d24d4204SJose E. Roman        0,
1380d24d4204SJose E. Roman /*69*/ 0,
1381d24d4204SJose E. Roman        0,
1382d24d4204SJose E. Roman        MatConvert_ScaLAPACK_Dense,
1383d24d4204SJose E. Roman        0,
1384d24d4204SJose E. Roman        0,
1385d24d4204SJose E. Roman /*74*/ 0,
1386d24d4204SJose E. Roman        0,
1387d24d4204SJose E. Roman        0,
1388d24d4204SJose E. Roman        0,
1389d24d4204SJose E. Roman        0,
1390d24d4204SJose E. Roman /*79*/ 0,
1391d24d4204SJose E. Roman        0,
1392d24d4204SJose E. Roman        0,
1393d24d4204SJose E. Roman        0,
1394d24d4204SJose E. Roman        MatLoad_ScaLAPACK,
1395d24d4204SJose E. Roman /*84*/ 0,
1396d24d4204SJose E. Roman        0,
1397d24d4204SJose E. Roman        0,
1398d24d4204SJose E. Roman        0,
1399d24d4204SJose E. Roman        0,
1400d24d4204SJose E. Roman /*89*/ 0,
1401d24d4204SJose E. Roman        0,
1402d24d4204SJose E. Roman        MatMatMultNumeric_ScaLAPACK,
1403d24d4204SJose E. Roman        0,
1404d24d4204SJose E. Roman        0,
1405d24d4204SJose E. Roman /*94*/ 0,
1406d24d4204SJose E. Roman        0,
1407d24d4204SJose E. Roman        0,
1408d24d4204SJose E. Roman        MatMatTransposeMultNumeric_ScaLAPACK,
1409d24d4204SJose E. Roman        0,
1410d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK,
1411d24d4204SJose E. Roman        0,
1412d24d4204SJose E. Roman        0,
1413d24d4204SJose E. Roman        MatConjugate_ScaLAPACK,
1414d24d4204SJose E. Roman        0,
1415d24d4204SJose E. Roman /*104*/0,
1416d24d4204SJose E. Roman        0,
1417d24d4204SJose E. Roman        0,
1418d24d4204SJose E. Roman        0,
1419d24d4204SJose E. Roman        0,
1420d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK,
1421d24d4204SJose E. Roman        0,
1422d24d4204SJose E. Roman        0,
1423d24d4204SJose E. Roman        0,
1424d24d4204SJose E. Roman        MatMissingDiagonal_ScaLAPACK,
1425d24d4204SJose E. Roman /*114*/0,
1426d24d4204SJose E. Roman        0,
1427d24d4204SJose E. Roman        0,
1428d24d4204SJose E. Roman        0,
1429d24d4204SJose E. Roman        0,
1430d24d4204SJose E. Roman /*119*/0,
1431d24d4204SJose E. Roman        MatHermitianTranspose_ScaLAPACK,
1432d24d4204SJose E. Roman        0,
1433d24d4204SJose E. Roman        0,
1434d24d4204SJose E. Roman        0,
1435d24d4204SJose E. Roman /*124*/0,
1436d24d4204SJose E. Roman        0,
1437d24d4204SJose E. Roman        0,
1438d24d4204SJose E. Roman        0,
1439d24d4204SJose E. Roman        0,
1440d24d4204SJose E. Roman /*129*/0,
1441d24d4204SJose E. Roman        0,
1442d24d4204SJose E. Roman        0,
1443d24d4204SJose E. Roman        0,
1444d24d4204SJose E. Roman        0,
1445d24d4204SJose E. Roman /*134*/0,
1446d24d4204SJose E. Roman        0,
1447d24d4204SJose E. Roman        0,
1448d24d4204SJose E. Roman        0,
1449d24d4204SJose E. Roman        0,
1450d24d4204SJose E. Roman        0,
1451d24d4204SJose E. Roman /*140*/0,
1452d24d4204SJose E. Roman        0,
1453d24d4204SJose E. Roman        0,
1454d24d4204SJose E. Roman        0,
1455d24d4204SJose E. Roman        0,
1456d24d4204SJose E. Roman /*145*/0,
1457d24d4204SJose E. Roman        0,
145899a7f59eSMark Adams        0,
145999a7f59eSMark Adams        0,
14607fb60732SBarry Smith        0,
14617fb60732SBarry Smith /*150*/0
1462d24d4204SJose E. Roman };
1463d24d4204SJose E. Roman 
1464d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1465d24d4204SJose E. Roman {
1466d24d4204SJose E. Roman   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1467d24d4204SJose E. Roman   PetscInt           size=stash->size,nsends;
1468d24d4204SJose E. Roman   PetscInt           count,*sindices,**rindices,i,j,l;
1469d24d4204SJose E. Roman   PetscScalar        **rvalues,*svalues;
1470d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1471d24d4204SJose E. Roman   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472d24d4204SJose E. Roman   PetscMPIInt        *sizes,*nlengths,nreceives;
1473d24d4204SJose E. Roman   PetscInt           *sp_idx,*sp_idy;
1474d24d4204SJose E. Roman   PetscScalar        *sp_val;
1475d24d4204SJose E. Roman   PetscMatStashSpace space,space_next;
1476d24d4204SJose E. Roman   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1478d24d4204SJose E. Roman 
1479d24d4204SJose E. Roman   PetscFunctionBegin;
1480d24d4204SJose E. Roman   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1481d24d4204SJose E. Roman     InsertMode addv;
14821c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
148308401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1485d24d4204SJose E. Roman   }
1486d24d4204SJose E. Roman 
1487d24d4204SJose E. Roman   bs2 = stash->bs*stash->bs;
1488d24d4204SJose E. Roman 
1489d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
14909566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size,&nlengths));
14919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n+1,&owner));
1492d24d4204SJose E. Roman 
1493d24d4204SJose E. Roman   i     = j    = 0;
1494d24d4204SJose E. Roman   space = stash->space_head;
1495d24d4204SJose E. Roman   while (space) {
1496d24d4204SJose E. Roman     space_next = space->next;
1497d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
14989566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx));
14999566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1500792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502d24d4204SJose E. Roman       nlengths[j]++; owner[i] = j;
1503d24d4204SJose E. Roman       i++;
1504d24d4204SJose E. Roman     }
1505d24d4204SJose E. Roman     space = space_next;
1506d24d4204SJose E. Roman   }
1507d24d4204SJose E. Roman 
1508d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
15099566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size,&sizes));
1510d24d4204SJose E. Roman   for (i=0, nsends=0; i<size; i++) {
1511d24d4204SJose E. Roman     if (nlengths[i]) {
1512d24d4204SJose E. Roman       sizes[i] = 1; nsends++;
1513d24d4204SJose E. Roman     }
1514d24d4204SJose E. Roman   }
1515d24d4204SJose E. Roman 
1516d24d4204SJose E. Roman   {PetscMPIInt *onodes,*olengths;
1517d24d4204SJose E. Roman    /* Determine the number of messages to expect, their lengths, from from-ids */
15189566063dSJacob Faibussowitsch    PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
15199566063dSJacob Faibussowitsch    PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1520d24d4204SJose E. Roman    /* since clubbing row,col - lengths are multiplied by 2 */
1521d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] *=2;
15229566063dSJacob Faibussowitsch    PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1523d24d4204SJose E. Roman    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
15259566063dSJacob Faibussowitsch    PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
15269566063dSJacob Faibussowitsch    PetscCall(PetscFree(onodes));
15279566063dSJacob Faibussowitsch    PetscCall(PetscFree(olengths));}
1528d24d4204SJose E. Roman 
1529d24d4204SJose E. Roman   /* do sends:
1530d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1531d24d4204SJose E. Roman          the ith processor
1532d24d4204SJose E. Roman   */
15339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
15349566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2*nsends,&send_waits));
15359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size,&startv,size,&starti));
1536d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
1537d24d4204SJose E. Roman   startv[0] = 0; starti[0] = 0;
1538d24d4204SJose E. Roman   for (i=1; i<size; i++) {
1539d24d4204SJose E. Roman     startv[i] = startv[i-1] + nlengths[i-1];
1540d24d4204SJose E. Roman     starti[i] = starti[i-1] + 2*nlengths[i-1];
1541d24d4204SJose E. Roman   }
1542d24d4204SJose E. Roman 
1543d24d4204SJose E. Roman   i     = 0;
1544d24d4204SJose E. Roman   space = stash->space_head;
1545d24d4204SJose E. Roman   while (space) {
1546d24d4204SJose E. Roman     space_next = space->next;
1547d24d4204SJose E. Roman     sp_idx     = space->idx;
1548d24d4204SJose E. Roman     sp_idy     = space->idy;
1549d24d4204SJose E. Roman     sp_val     = space->val;
1550d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
1551d24d4204SJose E. Roman       j = owner[i];
1552d24d4204SJose E. Roman       if (bs2 == 1) {
1553d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1554d24d4204SJose E. Roman       } else {
1555d24d4204SJose E. Roman         PetscInt    k;
1556d24d4204SJose E. Roman         PetscScalar *buf1,*buf2;
1557d24d4204SJose E. Roman         buf1 = svalues+bs2*startv[j];
1558d24d4204SJose E. Roman         buf2 = space->val + bs2*l;
1559d24d4204SJose E. Roman         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560d24d4204SJose E. Roman       }
1561d24d4204SJose E. Roman       sindices[starti[j]]             = sp_idx[l];
1562d24d4204SJose E. Roman       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563d24d4204SJose E. Roman       startv[j]++;
1564d24d4204SJose E. Roman       starti[j]++;
1565d24d4204SJose E. Roman       i++;
1566d24d4204SJose E. Roman     }
1567d24d4204SJose E. Roman     space = space_next;
1568d24d4204SJose E. Roman   }
1569d24d4204SJose E. Roman   startv[0] = 0;
1570d24d4204SJose E. Roman   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1571d24d4204SJose E. Roman 
1572d24d4204SJose E. Roman   for (i=0,count=0; i<size; i++) {
1573d24d4204SJose E. Roman     if (sizes[i]) {
15749566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
15759566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1576d24d4204SJose E. Roman     }
1577d24d4204SJose E. Roman   }
1578d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
15799566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1580d24d4204SJose E. Roman   for (i=0; i<size; i++) {
1581d24d4204SJose E. Roman     if (sizes[i]) {
15829566063dSJacob Faibussowitsch       PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1583d24d4204SJose E. Roman     }
1584d24d4204SJose E. Roman   }
1585d24d4204SJose E. Roman #endif
15869566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
15879566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
15889566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv,starti));
15899566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
1590d24d4204SJose E. Roman 
1591d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
15929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2*nreceives,&recv_waits));
1593d24d4204SJose E. Roman 
1594d24d4204SJose E. Roman   for (i=0; i<nreceives; i++) {
1595d24d4204SJose E. Roman     recv_waits[2*i]   = recv_waits1[i];
1596d24d4204SJose E. Roman     recv_waits[2*i+1] = recv_waits2[i];
1597d24d4204SJose E. Roman   }
1598d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1599d24d4204SJose E. Roman 
16009566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
16019566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
1602d24d4204SJose E. Roman 
1603d24d4204SJose E. Roman   stash->svalues         = svalues;
1604d24d4204SJose E. Roman   stash->sindices        = sindices;
1605d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1606d24d4204SJose E. Roman   stash->rindices        = rindices;
1607d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1608d24d4204SJose E. Roman   stash->nsends          = nsends;
1609d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1610d24d4204SJose E. Roman   stash->reproduce_count = 0;
1611d24d4204SJose E. Roman   PetscFunctionReturn(0);
1612d24d4204SJose E. Roman }
1613d24d4204SJose E. Roman 
1614d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615d24d4204SJose E. Roman {
1616d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1617d24d4204SJose E. Roman 
1618d24d4204SJose E. Roman   PetscFunctionBegin;
161928b400f6SJacob Faibussowitsch   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1620aed4548fSBarry Smith   PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
1621aed4548fSBarry Smith   PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
16229566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
16239566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1624d24d4204SJose E. Roman   PetscFunctionReturn(0);
1625d24d4204SJose E. Roman }
1626d24d4204SJose E. Roman 
1627d24d4204SJose E. Roman /*@
16286aad120cSJose E. Roman    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1629d24d4204SJose E. Roman    the ScaLAPACK matrix
1630d24d4204SJose E. Roman 
1631d24d4204SJose E. Roman    Logically Collective on A
1632d24d4204SJose E. Roman 
1633d8d19677SJose E. Roman    Input Parameters:
1634d24d4204SJose E. Roman +  A  - a MATSCALAPACK matrix
1635d24d4204SJose E. Roman .  mb - the row block size
1636d24d4204SJose E. Roman -  nb - the column block size
1637d24d4204SJose E. Roman 
1638d24d4204SJose E. Roman    Level: intermediate
1639d24d4204SJose E. Roman 
1640db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1641d24d4204SJose E. Roman @*/
1642d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1643d24d4204SJose E. Roman {
1644d24d4204SJose E. Roman   PetscFunctionBegin;
1645d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1646d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,mb,2);
1647d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,nb,3);
1648cac4c232SBarry Smith   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1649d24d4204SJose E. Roman   PetscFunctionReturn(0);
1650d24d4204SJose E. Roman }
1651d24d4204SJose E. Roman 
1652d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1653d24d4204SJose E. Roman {
1654d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1655d24d4204SJose E. Roman 
1656d24d4204SJose E. Roman   PetscFunctionBegin;
1657d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1658d24d4204SJose E. Roman   if (nb) *nb = a->nb;
1659d24d4204SJose E. Roman   PetscFunctionReturn(0);
1660d24d4204SJose E. Roman }
1661d24d4204SJose E. Roman 
1662d24d4204SJose E. Roman /*@
16636aad120cSJose E. Roman    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1664d24d4204SJose E. Roman    the ScaLAPACK matrix
1665d24d4204SJose E. Roman 
1666d24d4204SJose E. Roman    Not collective
1667d24d4204SJose E. Roman 
1668d24d4204SJose E. Roman    Input Parameter:
1669d24d4204SJose E. Roman .  A  - a MATSCALAPACK matrix
1670d24d4204SJose E. Roman 
1671d24d4204SJose E. Roman    Output Parameters:
1672d24d4204SJose E. Roman +  mb - the row block size
1673d24d4204SJose E. Roman -  nb - the column block size
1674d24d4204SJose E. Roman 
1675d24d4204SJose E. Roman    Level: intermediate
1676d24d4204SJose E. Roman 
1677db781477SPatrick Sanan .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1678d24d4204SJose E. Roman @*/
1679d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1680d24d4204SJose E. Roman {
1681d24d4204SJose E. Roman   PetscFunctionBegin;
1682d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1683cac4c232SBarry Smith   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1684d24d4204SJose E. Roman   PetscFunctionReturn(0);
1685d24d4204SJose E. Roman }
1686d24d4204SJose E. Roman 
1687d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1688d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1689d24d4204SJose E. Roman 
1690d24d4204SJose E. Roman /*MC
1691d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1692d24d4204SJose E. Roman 
1693d24d4204SJose E. Roman    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1694d24d4204SJose E. Roman 
1695d24d4204SJose E. Roman    Options Database Keys:
1696d24d4204SJose E. Roman +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
169789bba20eSBarry Smith .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1698d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1699d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1700d24d4204SJose E. Roman 
170189bba20eSBarry Smith   Note:
170289bba20eSBarry Smith    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
170389bba20eSBarry Smith    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
170489bba20eSBarry Smith    the given rank.
170589bba20eSBarry Smith 
1706d24d4204SJose E. Roman    Level: beginner
1707d24d4204SJose E. Roman 
170889bba20eSBarry Smith .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1709d24d4204SJose E. Roman M*/
1710d24d4204SJose E. Roman 
1711d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1712d24d4204SJose E. Roman {
1713d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1714d24d4204SJose E. Roman   PetscBool          flg,flg1;
1715d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1716d24d4204SJose E. Roman   MPI_Comm           icomm;
1717d24d4204SJose E. Roman   PetscBLASInt       nprow,npcol,myrow,mycol;
1718d24d4204SJose E. Roman   PetscInt           optv1,k=2,array[2]={0,0};
1719d24d4204SJose E. Roman   PetscMPIInt        size;
1720d24d4204SJose E. Roman 
1721d24d4204SJose E. Roman   PetscFunctionBegin;
17229566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1723d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1724d24d4204SJose E. Roman 
17259566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1726d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1727d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1728d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1729d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1730d24d4204SJose E. Roman 
17319566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A,&a));
1732d24d4204SJose E. Roman   A->data = (void*)a;
1733d24d4204SJose E. Roman 
1734d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1735d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
17369566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
17379566063dSJacob Faibussowitsch     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
17389566063dSJacob Faibussowitsch     PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1739d24d4204SJose E. Roman   }
17409566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
17419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1742d24d4204SJose E. Roman   if (!flg) {
17439566063dSJacob Faibussowitsch     PetscCall(PetscNewLog(A,&grid));
1744d24d4204SJose E. Roman 
17459566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(icomm,&size));
1746d24d4204SJose E. Roman     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1747d24d4204SJose E. Roman 
1748d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
17499566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1750d24d4204SJose E. Roman     if (flg1) {
175108401ef6SPierre Jolivet       PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1752d24d4204SJose E. Roman       grid->nprow = optv1;
1753d24d4204SJose E. Roman     }
1754d0609cedSBarry Smith     PetscOptionsEnd();
1755d24d4204SJose E. Roman 
1756d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1757d24d4204SJose E. Roman     grid->npcol = size/grid->nprow;
17589566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->nprow,&nprow));
17599566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->npcol,&npcol));
1760f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1761d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1762d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1763d24d4204SJose E. Roman     grid->grid_refct = 1;
1764d24d4204SJose E. Roman     grid->nprow      = nprow;
1765d24d4204SJose E. Roman     grid->npcol      = npcol;
1766d24d4204SJose E. Roman     grid->myrow      = myrow;
1767d24d4204SJose E. Roman     grid->mycol      = mycol;
1768d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1769f7ec113fSDamian Marek     grid->ictxrow = Csys2blacs_handle(icomm);
1770d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1771f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1772d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
17739566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1774d24d4204SJose E. Roman 
1775d24d4204SJose E. Roman   } else grid->grid_refct++;
17769566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
1777d24d4204SJose E. Roman   a->grid = grid;
1778d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1779d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1780d24d4204SJose E. Roman 
1781d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
17829566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1783d24d4204SJose E. Roman   if (flg) {
1784d24d4204SJose E. Roman     a->mb = array[0];
1785d24d4204SJose E. Roman     a->nb = (k>1)? array[1]: a->mb;
1786d24d4204SJose E. Roman   }
1787d0609cedSBarry Smith   PetscOptionsEnd();
1788d24d4204SJose E. Roman 
1789b12397e7SPierre Jolivet   a->roworiented = PETSC_TRUE;
17909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
17919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
17929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
17939566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1794d24d4204SJose E. Roman   PetscFunctionReturn(0);
1795d24d4204SJose E. Roman }
1796d24d4204SJose E. Roman 
1797d24d4204SJose E. Roman /*@C
1798d24d4204SJose E. Roman    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1799d24d4204SJose E. Roman    (2D block cyclic distribution).
1800d24d4204SJose E. Roman 
1801d24d4204SJose E. Roman    Collective
1802d24d4204SJose E. Roman 
1803d24d4204SJose E. Roman    Input Parameters:
1804d24d4204SJose E. Roman +  comm - MPI communicator
1805d24d4204SJose E. Roman .  mb   - row block size (or PETSC_DECIDE to have it set)
1806d24d4204SJose E. Roman .  nb   - column block size (or PETSC_DECIDE to have it set)
1807d24d4204SJose E. Roman .  M    - number of global rows
1808d24d4204SJose E. Roman .  N    - number of global columns
1809d24d4204SJose E. Roman .  rsrc - coordinate of process that owns the first row of the distributed matrix
1810d24d4204SJose E. Roman -  csrc - coordinate of process that owns the first column of the distributed matrix
1811d24d4204SJose E. Roman 
1812d24d4204SJose E. Roman    Output Parameter:
1813d24d4204SJose E. Roman .  A - the matrix
1814d24d4204SJose E. Roman 
1815d24d4204SJose E. Roman    Options Database Keys:
1816d24d4204SJose E. Roman .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1817d24d4204SJose E. Roman 
1818d24d4204SJose E. Roman    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1819d24d4204SJose E. Roman    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1820d24d4204SJose E. Roman    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1821d24d4204SJose E. Roman 
1822d24d4204SJose E. Roman    Notes:
1823d24d4204SJose E. Roman    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1824d24d4204SJose E. Roman    is chosen.
1825d24d4204SJose E. Roman 
1826d24d4204SJose E. Roman    Storage Information:
1827d24d4204SJose E. Roman    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1828d24d4204SJose E. Roman    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1829d24d4204SJose E. Roman    significance and are thus ignored. The block sizes refer to the values
1830d24d4204SJose E. Roman    used for the distributed matrix, not the same meaning as in BAIJ.
1831d24d4204SJose E. Roman 
1832d24d4204SJose E. Roman    Level: intermediate
1833d24d4204SJose E. Roman 
1834db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1835d24d4204SJose E. Roman @*/
1836d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1837d24d4204SJose E. Roman {
1838d24d4204SJose E. Roman   Mat_ScaLAPACK  *a;
1839d24d4204SJose E. Roman   PetscInt       m,n;
1840d24d4204SJose E. Roman 
1841d24d4204SJose E. Roman   PetscFunctionBegin;
18429566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,A));
18439566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A,MATSCALAPACK));
1844aed4548fSBarry Smith   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1845d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1846d24d4204SJose E. Roman   m = PETSC_DECIDE;
18479566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1848d24d4204SJose E. Roman   n = PETSC_DECIDE;
18499566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
18509566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A,m,n,M,N));
1851d24d4204SJose E. Roman   a = (Mat_ScaLAPACK*)(*A)->data;
18529566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(M,&a->M));
18539566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(N,&a->N));
18549566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
18559566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
18569566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(rsrc,&a->rsrc));
18579566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(csrc,&a->csrc));
18589566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*A));
1859d24d4204SJose E. Roman   PetscFunctionReturn(0);
1860d24d4204SJose E. Roman }
1861