xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet "       TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet "       PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet "       ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet "       YEAR = 1997\n"
1127e75052SPierre Jolivet "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE;
1327e75052SPierre Jolivet 
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman 
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman 
22f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23f7ec113fSDamian Marek {
24f7ec113fSDamian Marek   PetscFunctionBegin;
255f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
265f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27f7ec113fSDamian Marek   PetscFunctionReturn(0);
28f7ec113fSDamian Marek }
29f7ec113fSDamian Marek 
30d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31d24d4204SJose E. Roman {
32d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33d24d4204SJose E. Roman   PetscBool         iascii;
34d24d4204SJose E. Roman   PetscViewerFormat format;
35d24d4204SJose E. Roman   Mat               Adense;
36d24d4204SJose E. Roman 
37d24d4204SJose E. Roman   PetscFunctionBegin;
385f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39d24d4204SJose E. Roman   if (iascii) {
405f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscViewerGetFormat(viewer,&format));
41d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
425f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
435f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
445f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
455f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46d24d4204SJose E. Roman       PetscFunctionReturn(0);
47d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48d24d4204SJose E. Roman       PetscFunctionReturn(0);
49d24d4204SJose E. Roman     }
50d24d4204SJose E. Roman   }
51d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
525f80ce2aSJacob Faibussowitsch   CHKERRQ(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
535f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(Adense,viewer));
545f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&Adense));
55d24d4204SJose E. Roman   PetscFunctionReturn(0);
56d24d4204SJose E. Roman }
57d24d4204SJose E. Roman 
58d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59d24d4204SJose E. Roman {
60d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61d24d4204SJose E. Roman   PetscLogDouble isend[2],irecv[2];
62d24d4204SJose E. Roman 
63d24d4204SJose E. Roman   PetscFunctionBegin;
64d24d4204SJose E. Roman   info->block_size = 1.0;
65d24d4204SJose E. Roman 
66d24d4204SJose E. Roman   isend[0] = a->lld*a->locc;     /* locally allocated */
67d24d4204SJose E. Roman   isend[1] = a->locr*a->locc;    /* used submatrix */
68d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69d24d4204SJose E. Roman     info->nz_allocated   = isend[0];
70d24d4204SJose E. Roman     info->nz_used        = isend[1];
71d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
725f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A)));
73d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
74d24d4204SJose E. Roman     info->nz_used        = irecv[1];
75d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
765f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
78d24d4204SJose E. Roman     info->nz_used        = irecv[1];
79d24d4204SJose E. Roman   }
80d24d4204SJose E. Roman 
81d24d4204SJose E. Roman   info->nz_unneeded       = 0;
82d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
83d24d4204SJose E. Roman   info->mallocs           = 0;
84d24d4204SJose E. Roman   info->memory            = ((PetscObject)A)->mem;
85d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
86d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
87d24d4204SJose E. Roman   info->factor_mallocs    = 0;
88d24d4204SJose E. Roman   PetscFunctionReturn(0);
89d24d4204SJose E. Roman }
90d24d4204SJose E. Roman 
91d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92d24d4204SJose E. Roman {
93d24d4204SJose E. Roman   PetscFunctionBegin;
94d24d4204SJose E. Roman   switch (op) {
95d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATIONS:
96d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATION_ERR:
97d24d4204SJose E. Roman     case MAT_NEW_NONZERO_ALLOCATION_ERR:
98d24d4204SJose E. Roman     case MAT_SYMMETRIC:
99d24d4204SJose E. Roman     case MAT_SORTED_FULL:
100d24d4204SJose E. Roman     case MAT_HERMITIAN:
101d24d4204SJose E. Roman       break;
102d24d4204SJose E. Roman     default:
10398921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
104d24d4204SJose E. Roman   }
105d24d4204SJose E. Roman   PetscFunctionReturn(0);
106d24d4204SJose E. Roman }
107d24d4204SJose E. Roman 
108d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
109d24d4204SJose E. Roman {
110d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
111d24d4204SJose E. Roman   PetscInt       i,j;
112d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
113d24d4204SJose E. Roman 
114d24d4204SJose E. Roman   PetscFunctionBegin;
115d24d4204SJose E. Roman   for (i=0;i<nr;i++) {
116d24d4204SJose E. Roman     if (rows[i] < 0) continue;
1175f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(rows[i]+1,&gridx));
118d24d4204SJose E. Roman     for (j=0;j<nc;j++) {
119d24d4204SJose E. Roman       if (cols[j] < 0) continue;
1205f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBLASIntCast(cols[j]+1,&gcidx));
121d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
122d24d4204SJose E. Roman       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
123d24d4204SJose E. Roman         switch (imode) {
124d24d4204SJose E. Roman           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
125d24d4204SJose E. Roman           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
12698921bdaSJacob Faibussowitsch           default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
127d24d4204SJose E. Roman         }
128d24d4204SJose E. Roman       } else {
129*28b400f6SJacob Faibussowitsch         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
130d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
1315f80ce2aSJacob Faibussowitsch         CHKERRQ(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES)));
132d24d4204SJose E. Roman       }
133d24d4204SJose E. Roman     }
134d24d4204SJose E. Roman   }
135d24d4204SJose E. Roman   PetscFunctionReturn(0);
136d24d4204SJose E. Roman }
137d24d4204SJose E. Roman 
138d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
139d24d4204SJose E. Roman {
140d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
141d24d4204SJose E. Roman   PetscScalar    *x2d,*y2d,alpha=1.0;
142d24d4204SJose E. Roman   const PetscInt *ranges;
143d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
144d24d4204SJose E. Roman 
145d24d4204SJose E. Roman   PetscFunctionBegin;
146d24d4204SJose E. Roman   if (transpose) {
147d24d4204SJose E. Roman 
148d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1495f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
1505f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
151d24d4204SJose E. Roman     xlld = PetscMax(1,A->rmap->n);
152d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
153d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
1545f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
1555f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
156d24d4204SJose E. Roman     ylld = 1;
157d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
158d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
159d24d4204SJose E. Roman 
160d24d4204SJose E. Roman     /* allocate 2d vectors */
161d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
162d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1635f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d));
164d24d4204SJose E. Roman     xlld = PetscMax(1,lszx);
165d24d4204SJose E. Roman 
166d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
167d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
168d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
169d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
170d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
171d24d4204SJose E. Roman 
172d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
173d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
174d24d4204SJose E. Roman 
175d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
176d24d4204SJose E. Roman     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
177d24d4204SJose E. Roman 
178d24d4204SJose E. Roman     /* call PBLAS subroutine */
179d24d4204SJose E. Roman     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
180d24d4204SJose E. Roman 
181d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
182d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
183d24d4204SJose E. Roman 
184d24d4204SJose E. Roman   } else {   /* non-transpose */
185d24d4204SJose E. Roman 
186d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1875f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
1885f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
189d24d4204SJose E. Roman     xlld = 1;
190d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
191d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
1925f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
1935f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
194d24d4204SJose E. Roman     ylld = PetscMax(1,A->rmap->n);
195d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
196d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
197d24d4204SJose E. Roman 
198d24d4204SJose E. Roman     /* allocate 2d vectors */
199d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
200d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
2015f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d));
202d24d4204SJose E. Roman     ylld = PetscMax(1,lszy);
203d24d4204SJose E. Roman 
204d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
205d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
206d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
207d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
208d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
209d24d4204SJose E. Roman 
210d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
211d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
212d24d4204SJose E. Roman 
213d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
214d24d4204SJose E. Roman     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
215d24d4204SJose E. Roman 
216d24d4204SJose E. Roman     /* call PBLAS subroutine */
217d24d4204SJose E. Roman     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
218d24d4204SJose E. Roman 
219d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
220d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
221d24d4204SJose E. Roman 
222d24d4204SJose E. Roman   }
2235f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(x2d,y2d));
224d24d4204SJose E. Roman   PetscFunctionReturn(0);
225d24d4204SJose E. Roman }
226d24d4204SJose E. Roman 
227d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
228d24d4204SJose E. Roman {
229d24d4204SJose E. Roman   const PetscScalar *xarray;
230d24d4204SJose E. Roman   PetscScalar       *yarray;
231d24d4204SJose E. Roman 
232d24d4204SJose E. Roman   PetscFunctionBegin;
2335f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArrayRead(x,&xarray));
2345f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(y,&yarray));
2355f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
2365f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArrayRead(x,&xarray));
2375f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(y,&yarray));
238d24d4204SJose E. Roman   PetscFunctionReturn(0);
239d24d4204SJose E. Roman }
240d24d4204SJose E. Roman 
241d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
242d24d4204SJose E. Roman {
243d24d4204SJose E. Roman   const PetscScalar *xarray;
244d24d4204SJose E. Roman   PetscScalar       *yarray;
245d24d4204SJose E. Roman 
246d24d4204SJose E. Roman   PetscFunctionBegin;
2475f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArrayRead(x,&xarray));
2485f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(y,&yarray));
2495f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
2505f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArrayRead(x,&xarray));
2515f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(y,&yarray));
252d24d4204SJose E. Roman   PetscFunctionReturn(0);
253d24d4204SJose E. Roman }
254d24d4204SJose E. Roman 
255d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
256d24d4204SJose E. Roman {
257d24d4204SJose E. Roman   const PetscScalar *xarray;
258d24d4204SJose E. Roman   PetscScalar       *zarray;
259d24d4204SJose E. Roman 
260d24d4204SJose E. Roman   PetscFunctionBegin;
2615f80ce2aSJacob Faibussowitsch   if (y != z) CHKERRQ(VecCopy(y,z));
2625f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArrayRead(x,&xarray));
2635f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(z,&zarray));
2645f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
2655f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArrayRead(x,&xarray));
2665f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(z,&zarray));
267d24d4204SJose E. Roman   PetscFunctionReturn(0);
268d24d4204SJose E. Roman }
269d24d4204SJose E. Roman 
270d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
271d24d4204SJose E. Roman {
272d24d4204SJose E. Roman   const PetscScalar *xarray;
273d24d4204SJose E. Roman   PetscScalar       *zarray;
274d24d4204SJose E. Roman 
275d24d4204SJose E. Roman   PetscFunctionBegin;
2765f80ce2aSJacob Faibussowitsch   if (y != z) CHKERRQ(VecCopy(y,z));
2775f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArrayRead(x,&xarray));
2785f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(z,&zarray));
2795f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
2805f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArrayRead(x,&xarray));
2815f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(z,&zarray));
282d24d4204SJose E. Roman   PetscFunctionReturn(0);
283d24d4204SJose E. Roman }
284d24d4204SJose E. Roman 
285d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
286d24d4204SJose E. Roman {
287d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
288d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
289d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
290d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
291d24d4204SJose E. Roman   PetscBLASInt  one=1;
292d24d4204SJose E. Roman 
293d24d4204SJose E. Roman   PetscFunctionBegin;
294d24d4204SJose E. Roman   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
295d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
296d24d4204SJose E. Roman   PetscFunctionReturn(0);
297d24d4204SJose E. Roman }
298d24d4204SJose E. Roman 
299d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
300d24d4204SJose E. Roman {
301d24d4204SJose E. Roman   PetscFunctionBegin;
3025f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
3035f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetType(C,MATSCALAPACK));
3045f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetUp(C));
305d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
306d24d4204SJose E. Roman   PetscFunctionReturn(0);
307d24d4204SJose E. Roman }
308d24d4204SJose E. Roman 
309d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
310d24d4204SJose E. Roman {
311d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
312d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
313d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
314d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
315d24d4204SJose E. Roman   PetscBLASInt  one=1;
316d24d4204SJose E. Roman 
317d24d4204SJose E. Roman   PetscFunctionBegin;
318d24d4204SJose E. Roman   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
319d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
320d24d4204SJose E. Roman   PetscFunctionReturn(0);
321d24d4204SJose E. Roman }
322d24d4204SJose E. Roman 
323d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
324d24d4204SJose E. Roman {
325d24d4204SJose E. Roman   PetscFunctionBegin;
3265f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
3275f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetType(C,MATSCALAPACK));
3285f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetUp(C));
329d24d4204SJose E. Roman   PetscFunctionReturn(0);
330d24d4204SJose E. Roman }
331d24d4204SJose E. Roman 
332d24d4204SJose E. Roman /* --------------------------------------- */
333d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
334d24d4204SJose E. Roman {
335d24d4204SJose E. Roman   PetscFunctionBegin;
336d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
337d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
338d24d4204SJose E. Roman   PetscFunctionReturn(0);
339d24d4204SJose E. Roman }
340d24d4204SJose E. Roman 
341d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
342d24d4204SJose E. Roman {
343d24d4204SJose E. Roman   PetscFunctionBegin;
344d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
345d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
346d24d4204SJose E. Roman   PetscFunctionReturn(0);
347d24d4204SJose E. Roman }
348d24d4204SJose E. Roman 
349d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
350d24d4204SJose E. Roman {
351d24d4204SJose E. Roman   Mat_Product    *product = C->product;
352d24d4204SJose E. Roman 
353d24d4204SJose E. Roman   PetscFunctionBegin;
354d24d4204SJose E. Roman   switch (product->type) {
355d24d4204SJose E. Roman     case MATPRODUCT_AB:
3565f80ce2aSJacob Faibussowitsch       CHKERRQ(MatProductSetFromOptions_ScaLAPACK_AB(C));
357d24d4204SJose E. Roman       break;
358d24d4204SJose E. Roman     case MATPRODUCT_ABt:
3595f80ce2aSJacob Faibussowitsch       CHKERRQ(MatProductSetFromOptions_ScaLAPACK_ABt(C));
360d24d4204SJose E. Roman       break;
36198921bdaSJacob Faibussowitsch     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
362d24d4204SJose E. Roman   }
363d24d4204SJose E. Roman   PetscFunctionReturn(0);
364d24d4204SJose E. Roman }
365d24d4204SJose E. Roman /* --------------------------------------- */
366d24d4204SJose E. Roman 
367d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
368d24d4204SJose E. Roman {
369d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
370d24d4204SJose E. Roman   PetscScalar       *darray,*d2d,v;
371d24d4204SJose E. Roman   const PetscInt    *ranges;
372d24d4204SJose E. Roman   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
373d24d4204SJose E. Roman 
374d24d4204SJose E. Roman   PetscFunctionBegin;
3755f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(D,&darray));
376d24d4204SJose E. Roman 
377d24d4204SJose E. Roman   if (A->rmap->N<=A->cmap->N) {   /* row version */
378d24d4204SJose E. Roman 
379d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
3805f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
3815f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
382d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
383d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
384d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
385d24d4204SJose E. Roman 
386d24d4204SJose E. Roman     /* allocate 2d vector */
387d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
3885f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCalloc1(lszd,&d2d));
389d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
390d24d4204SJose E. Roman 
391d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
392d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
393d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
394d24d4204SJose E. Roman 
395d24d4204SJose E. Roman     /* collect diagonal */
396d24d4204SJose E. Roman     for (j=1;j<=a->M;j++) {
397d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
398d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
399d24d4204SJose E. Roman     }
400d24d4204SJose E. Roman 
401d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
402d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
4035f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(d2d));
404d24d4204SJose E. Roman 
405d24d4204SJose E. Roman   } else {   /* column version */
406d24d4204SJose E. Roman 
407d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4085f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
4095f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
410d24d4204SJose E. Roman     dlld = 1;
411d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
412d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
413d24d4204SJose E. Roman 
414d24d4204SJose E. Roman     /* allocate 2d vector */
415d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
4165f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCalloc1(lszd,&d2d));
417d24d4204SJose E. Roman 
418d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
419d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
420d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
421d24d4204SJose E. Roman 
422d24d4204SJose E. Roman     /* collect diagonal */
423d24d4204SJose E. Roman     for (j=1;j<=a->N;j++) {
424d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
425d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
426d24d4204SJose E. Roman     }
427d24d4204SJose E. Roman 
428d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
429d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
4305f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(d2d));
431d24d4204SJose E. Roman   }
432d24d4204SJose E. Roman 
4335f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(D,&darray));
4345f80ce2aSJacob Faibussowitsch   CHKERRQ(VecAssemblyBegin(D));
4355f80ce2aSJacob Faibussowitsch   CHKERRQ(VecAssemblyEnd(D));
436d24d4204SJose E. Roman   PetscFunctionReturn(0);
437d24d4204SJose E. Roman }
438d24d4204SJose E. Roman 
439d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
440d24d4204SJose E. Roman {
441d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
442d24d4204SJose E. Roman   const PetscScalar *d;
443d24d4204SJose E. Roman   const PetscInt    *ranges;
444d24d4204SJose E. Roman   PetscScalar       *d2d;
445d24d4204SJose E. Roman   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
446d24d4204SJose E. Roman 
447d24d4204SJose E. Roman   PetscFunctionBegin;
448d24d4204SJose E. Roman   if (R) {
4495f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArrayRead(R,(const PetscScalar **)&d));
450d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4515f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
4525f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
453d24d4204SJose E. Roman     dlld = 1;
454d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
455d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
456d24d4204SJose E. Roman 
457d24d4204SJose E. Roman     /* allocate 2d vector */
458d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
4595f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCalloc1(lszd,&d2d));
460d24d4204SJose E. Roman 
461d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
462d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
463d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
464d24d4204SJose E. Roman 
465d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
466d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
467d24d4204SJose E. Roman 
468d24d4204SJose E. Roman     /* broadcast along process columns */
469d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
470d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
471d24d4204SJose E. Roman 
472d24d4204SJose E. Roman     /* local scaling */
473d24d4204SJose E. Roman     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
474d24d4204SJose E. Roman 
4755f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(d2d));
4765f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArrayRead(R,(const PetscScalar **)&d));
477d24d4204SJose E. Roman   }
478d24d4204SJose E. Roman   if (L) {
4795f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArrayRead(L,(const PetscScalar **)&d));
480d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4815f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
4825f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
483d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
484d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
485d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
486d24d4204SJose E. Roman 
487d24d4204SJose E. Roman     /* allocate 2d vector */
488d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
4895f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCalloc1(lszd,&d2d));
490d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
491d24d4204SJose E. Roman 
492d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
493d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
494d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
495d24d4204SJose E. Roman 
496d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
497d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
498d24d4204SJose E. Roman 
499d24d4204SJose E. Roman     /* broadcast along process rows */
500d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
501d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
502d24d4204SJose E. Roman 
503d24d4204SJose E. Roman     /* local scaling */
504d24d4204SJose E. Roman     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
505d24d4204SJose E. Roman 
5065f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(d2d));
5075f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArrayRead(L,(const PetscScalar **)&d));
508d24d4204SJose E. Roman   }
509d24d4204SJose E. Roman   PetscFunctionReturn(0);
510d24d4204SJose E. Roman }
511d24d4204SJose E. Roman 
512d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
513d24d4204SJose E. Roman {
514d24d4204SJose E. Roman   PetscFunctionBegin;
515d24d4204SJose E. Roman   *missing = PETSC_FALSE;
516d24d4204SJose E. Roman   PetscFunctionReturn(0);
517d24d4204SJose E. Roman }
518d24d4204SJose E. Roman 
519d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
520d24d4204SJose E. Roman {
521d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
522d24d4204SJose E. Roman   PetscBLASInt  n,one=1;
523d24d4204SJose E. Roman 
524d24d4204SJose E. Roman   PetscFunctionBegin;
525d24d4204SJose E. Roman   n = x->lld*x->locc;
526d24d4204SJose E. Roman   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
527d24d4204SJose E. Roman   PetscFunctionReturn(0);
528d24d4204SJose E. Roman }
529d24d4204SJose E. Roman 
530d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
531d24d4204SJose E. Roman {
532d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
533d24d4204SJose E. Roman   PetscBLASInt  i,n;
534d24d4204SJose E. Roman   PetscScalar   v;
535d24d4204SJose E. Roman 
536d24d4204SJose E. Roman   PetscFunctionBegin;
537d24d4204SJose E. Roman   n = PetscMin(x->M,x->N);
538d24d4204SJose E. Roman   for (i=1;i<=n;i++) {
539d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
540d24d4204SJose E. Roman     v += alpha;
541d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
542d24d4204SJose E. Roman   }
543d24d4204SJose E. Roman   PetscFunctionReturn(0);
544d24d4204SJose E. Roman }
545d24d4204SJose E. Roman 
546d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
547d24d4204SJose E. Roman {
548d24d4204SJose E. Roman   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
549d24d4204SJose E. Roman   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
550d24d4204SJose E. Roman   PetscBLASInt   one=1;
551d24d4204SJose E. Roman   PetscScalar    beta=1.0;
552d24d4204SJose E. Roman 
553d24d4204SJose E. Roman   PetscFunctionBegin;
554d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y,1,X,3);
555d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
5565f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectStateIncrease((PetscObject)Y));
557d24d4204SJose E. Roman   PetscFunctionReturn(0);
558d24d4204SJose E. Roman }
559d24d4204SJose E. Roman 
560d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
561d24d4204SJose E. Roman {
562d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
563d24d4204SJose E. Roman   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
564d24d4204SJose E. Roman 
565d24d4204SJose E. Roman   PetscFunctionBegin;
5665f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
5675f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectStateIncrease((PetscObject)B));
568d24d4204SJose E. Roman   PetscFunctionReturn(0);
569d24d4204SJose E. Roman }
570d24d4204SJose E. Roman 
571d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
572d24d4204SJose E. Roman {
573d24d4204SJose E. Roman   Mat            Bs;
574d24d4204SJose E. Roman   MPI_Comm       comm;
575d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
576d24d4204SJose E. Roman 
577d24d4204SJose E. Roman   PetscFunctionBegin;
5785f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
5795f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(comm,&Bs));
5805f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
5815f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetType(Bs,MATSCALAPACK));
582d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
583d24d4204SJose E. Roman   b->M    = a->M;
584d24d4204SJose E. Roman   b->N    = a->N;
585d24d4204SJose E. Roman   b->mb   = a->mb;
586d24d4204SJose E. Roman   b->nb   = a->nb;
587d24d4204SJose E. Roman   b->rsrc = a->rsrc;
588d24d4204SJose E. Roman   b->csrc = a->csrc;
5895f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetUp(Bs));
590d24d4204SJose E. Roman   *B = Bs;
591d24d4204SJose E. Roman   if (op == MAT_COPY_VALUES) {
5925f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
593d24d4204SJose E. Roman   }
594d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
595d24d4204SJose E. Roman   PetscFunctionReturn(0);
596d24d4204SJose E. Roman }
597d24d4204SJose E. Roman 
598d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
599d24d4204SJose E. Roman {
600d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601d24d4204SJose E. Roman   Mat            Bs = *B;
602d24d4204SJose E. Roman   PetscBLASInt   one=1;
603d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
604d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
605d24d4204SJose E. Roman   PetscInt       i,j;
606d24d4204SJose E. Roman #endif
607d24d4204SJose E. Roman 
608d24d4204SJose E. Roman   PetscFunctionBegin;
609d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
6105f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
611d24d4204SJose E. Roman     *B = Bs;
612d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
613d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
614d24d4204SJose E. Roman   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
615d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
616d24d4204SJose E. Roman   /* undo conjugation */
617d24d4204SJose E. Roman   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
618d24d4204SJose E. Roman #endif
619d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
620d24d4204SJose E. Roman   PetscFunctionReturn(0);
621d24d4204SJose E. Roman }
622d24d4204SJose E. Roman 
623d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
624d24d4204SJose E. Roman {
625d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
626d24d4204SJose E. Roman   PetscInt      i,j;
627d24d4204SJose E. Roman 
628d24d4204SJose E. Roman   PetscFunctionBegin;
629d24d4204SJose E. Roman   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
630d24d4204SJose E. Roman   PetscFunctionReturn(0);
631d24d4204SJose E. Roman }
632d24d4204SJose E. Roman 
633d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
634d24d4204SJose E. Roman {
635d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
636d24d4204SJose E. Roman   Mat            Bs = *B;
637d24d4204SJose E. Roman   PetscBLASInt   one=1;
638d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
639d24d4204SJose E. Roman 
640d24d4204SJose E. Roman   PetscFunctionBegin;
641d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
6425f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
643d24d4204SJose E. Roman     *B = Bs;
644d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
645d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
646d24d4204SJose E. Roman   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
647d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
648d24d4204SJose E. Roman   PetscFunctionReturn(0);
649d24d4204SJose E. Roman }
650d24d4204SJose E. Roman 
651d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
652d24d4204SJose E. Roman {
653d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
654d24d4204SJose E. Roman   PetscScalar    *x,*x2d;
655d24d4204SJose E. Roman   const PetscInt *ranges;
656d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
657d24d4204SJose E. Roman 
658d24d4204SJose E. Roman   PetscFunctionBegin;
6595f80ce2aSJacob Faibussowitsch   CHKERRQ(VecCopy(B,X));
6605f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(X,&x));
661d24d4204SJose E. Roman 
662d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
6635f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
6645f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
665d24d4204SJose E. Roman   xlld = PetscMax(1,A->rmap->n);
666d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
667d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
668d24d4204SJose E. Roman 
669d24d4204SJose E. Roman   /* allocate 2d vector */
670d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
6715f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(lszx,&x2d));
672d24d4204SJose E. Roman   xlld = PetscMax(1,lszx);
673d24d4204SJose E. Roman 
674d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
675d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
676d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
677d24d4204SJose E. Roman 
678d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
679d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
680d24d4204SJose E. Roman 
681d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
682d24d4204SJose E. Roman   switch (A->factortype) {
683d24d4204SJose E. Roman     case MAT_FACTOR_LU:
684d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
685d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
686d24d4204SJose E. Roman       break;
687d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
688d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
689d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
690d24d4204SJose E. Roman       break;
691d24d4204SJose E. Roman     default:
692d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
693d24d4204SJose E. Roman   }
694d24d4204SJose E. Roman 
695d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
696d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
697d24d4204SJose E. Roman 
6985f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(x2d));
6995f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(X,&x));
700d24d4204SJose E. Roman   PetscFunctionReturn(0);
701d24d4204SJose E. Roman }
702d24d4204SJose E. Roman 
703d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
704d24d4204SJose E. Roman {
705d24d4204SJose E. Roman   PetscFunctionBegin;
7065f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSolve_ScaLAPACK(A,B,X));
7075f80ce2aSJacob Faibussowitsch   CHKERRQ(VecAXPY(X,1,Y));
708d24d4204SJose E. Roman   PetscFunctionReturn(0);
709d24d4204SJose E. Roman }
710d24d4204SJose E. Roman 
711d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
712d24d4204SJose E. Roman {
713d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
714d24d4204SJose E. Roman   PetscBool      flg1,flg2;
715d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
716d24d4204SJose E. Roman 
717d24d4204SJose E. Roman   PetscFunctionBegin;
7185f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
7195f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
7202c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!(flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
721d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B,1,X,2);
722d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)B->data;
723d24d4204SJose E. Roman   x = (Mat_ScaLAPACK*)X->data;
7245f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
725d24d4204SJose E. Roman 
726d24d4204SJose E. Roman   switch (A->factortype) {
727d24d4204SJose E. Roman     case MAT_FACTOR_LU:
728d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
729d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
730d24d4204SJose E. Roman       break;
731d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
732d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
733d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
734d24d4204SJose E. Roman       break;
735d24d4204SJose E. Roman     default:
736d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
737d24d4204SJose E. Roman   }
738d24d4204SJose E. Roman   PetscFunctionReturn(0);
739d24d4204SJose E. Roman }
740d24d4204SJose E. Roman 
741d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
742d24d4204SJose E. Roman {
743d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
744d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
745d24d4204SJose E. Roman 
746d24d4204SJose E. Roman   PetscFunctionBegin;
747d24d4204SJose E. Roman   if (!a->pivots) {
7485f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(a->locr+a->mb,&a->pivots));
7495f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
750d24d4204SJose E. Roman   }
751d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
752d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf",info);
753d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
754d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
755d24d4204SJose E. Roman 
7565f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(A->solvertype));
7575f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
758d24d4204SJose E. Roman   PetscFunctionReturn(0);
759d24d4204SJose E. Roman }
760d24d4204SJose E. Roman 
761d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
762d24d4204SJose E. Roman {
763d24d4204SJose E. Roman   PetscFunctionBegin;
7645f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN));
7655f80ce2aSJacob Faibussowitsch   CHKERRQ(MatLUFactor_ScaLAPACK(F,0,0,info));
766d24d4204SJose E. Roman   PetscFunctionReturn(0);
767d24d4204SJose E. Roman }
768d24d4204SJose E. Roman 
769d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
770d24d4204SJose E. Roman {
771d24d4204SJose E. Roman   PetscFunctionBegin;
772d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
773d24d4204SJose E. Roman   PetscFunctionReturn(0);
774d24d4204SJose E. Roman }
775d24d4204SJose E. Roman 
776d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
777d24d4204SJose E. Roman {
778d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
779d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
780d24d4204SJose E. Roman 
781d24d4204SJose E. Roman   PetscFunctionBegin;
782d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
783d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf",info);
784d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
785d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
786d24d4204SJose E. Roman 
7875f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(A->solvertype));
7885f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
789d24d4204SJose E. Roman   PetscFunctionReturn(0);
790d24d4204SJose E. Roman }
791d24d4204SJose E. Roman 
792d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
793d24d4204SJose E. Roman {
794d24d4204SJose E. Roman   PetscFunctionBegin;
7955f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN));
7965f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCholeskyFactor_ScaLAPACK(F,0,info));
797d24d4204SJose E. Roman   PetscFunctionReturn(0);
798d24d4204SJose E. Roman }
799d24d4204SJose E. Roman 
800d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
801d24d4204SJose E. Roman {
802d24d4204SJose E. Roman   PetscFunctionBegin;
803d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
804d24d4204SJose E. Roman   PetscFunctionReturn(0);
805d24d4204SJose E. Roman }
806d24d4204SJose E. Roman 
807d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
808d24d4204SJose E. Roman {
809d24d4204SJose E. Roman   PetscFunctionBegin;
810d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
811d24d4204SJose E. Roman   PetscFunctionReturn(0);
812d24d4204SJose E. Roman }
813d24d4204SJose E. Roman 
814d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
815d24d4204SJose E. Roman {
816d24d4204SJose E. Roman   Mat            B;
81759172f18SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
818d24d4204SJose E. Roman 
819d24d4204SJose E. Roman   PetscFunctionBegin;
820d24d4204SJose E. Roman   /* Create the factorization matrix */
8215f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
82266e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
823d24d4204SJose E. Roman   B->factortype = ftype;
8245f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(B->solvertype));
8255f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
826d24d4204SJose E. Roman 
8275f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
828d24d4204SJose E. Roman   *F = B;
829d24d4204SJose E. Roman   PetscFunctionReturn(0);
830d24d4204SJose E. Roman }
831d24d4204SJose E. Roman 
832d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
833d24d4204SJose E. Roman {
834d24d4204SJose E. Roman   PetscFunctionBegin;
8355f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
8365f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
837d24d4204SJose E. Roman   PetscFunctionReturn(0);
838d24d4204SJose E. Roman }
839d24d4204SJose E. Roman 
840d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
841d24d4204SJose E. Roman {
842d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
843d24d4204SJose E. Roman   PetscBLASInt   one=1,lwork=0;
844d24d4204SJose E. Roman   const char     *ntype;
845d68f4f38SPierre Jolivet   PetscScalar    *work=NULL,dummy;
846d24d4204SJose E. Roman 
847d24d4204SJose E. Roman   PetscFunctionBegin;
848d24d4204SJose E. Roman   switch (type) {
849d24d4204SJose E. Roman     case NORM_1:
850d24d4204SJose E. Roman       ntype = "1";
851d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
852d24d4204SJose E. Roman       break;
853d24d4204SJose E. Roman     case NORM_FROBENIUS:
854d24d4204SJose E. Roman       ntype = "F";
855d24d4204SJose E. Roman       work  = &dummy;
856d24d4204SJose E. Roman       break;
857d24d4204SJose E. Roman     case NORM_INFINITY:
858d24d4204SJose E. Roman       ntype = "I";
859d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
860d24d4204SJose E. Roman       break;
861d24d4204SJose E. Roman     default:
862d24d4204SJose E. Roman       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
863d24d4204SJose E. Roman   }
8645f80ce2aSJacob Faibussowitsch   if (lwork) CHKERRQ(PetscMalloc1(lwork,&work));
865d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
8665f80ce2aSJacob Faibussowitsch   if (lwork) CHKERRQ(PetscFree(work));
867d24d4204SJose E. Roman   PetscFunctionReturn(0);
868d24d4204SJose E. Roman }
869d24d4204SJose E. Roman 
870d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
871d24d4204SJose E. Roman {
872d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
873d24d4204SJose E. Roman 
874d24d4204SJose E. Roman   PetscFunctionBegin;
8755f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscArrayzero(a->loc,a->lld*a->locc));
876d24d4204SJose E. Roman   PetscFunctionReturn(0);
877d24d4204SJose E. Roman }
878d24d4204SJose E. Roman 
879d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
880d24d4204SJose E. Roman {
881d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
882d24d4204SJose E. Roman   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
883d24d4204SJose E. Roman 
884d24d4204SJose E. Roman   PetscFunctionBegin;
885d24d4204SJose E. Roman   if (rows) {
886d24d4204SJose E. Roman     n     = a->locr;
887d24d4204SJose E. Roman     nb    = a->mb;
888d24d4204SJose E. Roman     isrc  = a->rsrc;
889d24d4204SJose E. Roman     nproc = a->grid->nprow;
890d24d4204SJose E. Roman     iproc = a->grid->myrow;
8915f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(n,&idx));
892d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
8935f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
894d24d4204SJose E. Roman   }
895d24d4204SJose E. Roman   if (cols) {
896d24d4204SJose E. Roman     n     = a->locc;
897d24d4204SJose E. Roman     nb    = a->nb;
898d24d4204SJose E. Roman     isrc  = a->csrc;
899d24d4204SJose E. Roman     nproc = a->grid->npcol;
900d24d4204SJose E. Roman     iproc = a->grid->mycol;
9015f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(n,&idx));
902d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
9035f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
904d24d4204SJose E. Roman   }
905d24d4204SJose E. Roman   PetscFunctionReturn(0);
906d24d4204SJose E. Roman }
907d24d4204SJose E. Roman 
908d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
909d24d4204SJose E. Roman {
910d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
911d24d4204SJose E. Roman   Mat            Bmpi;
912d24d4204SJose E. Roman   MPI_Comm       comm;
9134b1a79daSJose E. Roman   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
9144b1a79daSJose E. Roman   const PetscInt *ranges,*branges,*cwork;
9154b1a79daSJose E. Roman   const PetscScalar *vwork;
916d24d4204SJose E. Roman   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
917d24d4204SJose E. Roman   PetscScalar    *barray;
9184b1a79daSJose E. Roman   PetscBool      differ=PETSC_FALSE;
9194b1a79daSJose E. Roman   PetscMPIInt    size;
920d24d4204SJose E. Roman 
921d24d4204SJose E. Roman   PetscFunctionBegin;
9225f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
9235f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
9244b1a79daSJose E. Roman 
9254b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9265f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_size(comm,&size));
9275f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges((*B)->rmap,&branges));
9284b1a79daSJose E. Roman     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
9294b1a79daSJose E. Roman   }
9304b1a79daSJose E. Roman 
9314b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
9325f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreate(comm,&Bmpi));
9334b1a79daSJose E. Roman     m = PETSC_DECIDE;
9345f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
9354b1a79daSJose E. Roman     n = PETSC_DECIDE;
9365f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
9375f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetSizes(Bmpi,m,n,M,N));
9385f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetType(Bmpi,MATDENSE));
9395f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetUp(Bmpi));
9404b1a79daSJose E. Roman 
9414b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9425f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
9434b1a79daSJose E. Roman     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
9444b1a79daSJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
9454b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
9464b1a79daSJose E. Roman 
9474b1a79daSJose E. Roman     /* redistribute matrix */
9485f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDenseGetArray(Bmpi,&barray));
9494b1a79daSJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
9505f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDenseRestoreArray(Bmpi,&barray));
9515f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
9525f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
9534b1a79daSJose E. Roman 
9544b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
9555f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(Bmpi,&rstart,&rend));
9564b1a79daSJose E. Roman     for (i=rstart;i<rend;i++) {
9575f80ce2aSJacob Faibussowitsch       CHKERRQ(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
9585f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
9595f80ce2aSJacob Faibussowitsch       CHKERRQ(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
9604b1a79daSJose E. Roman     }
9615f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
9625f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
9635f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDestroy(&Bmpi));
9644b1a79daSJose E. Roman 
9654b1a79daSJose E. Roman   } else {  /* normal cases */
966d24d4204SJose E. Roman 
967d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
968d24d4204SJose E. Roman     else {
9695f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreate(comm,&Bmpi));
97092c846b4SJose E. Roman       m = PETSC_DECIDE;
9715f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
97292c846b4SJose E. Roman       n = PETSC_DECIDE;
9735f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
9745f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetSizes(Bmpi,m,n,M,N));
9755f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetType(Bmpi,MATDENSE));
9765f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetUp(Bmpi));
977d24d4204SJose E. Roman     }
978d24d4204SJose E. Roman 
979d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9805f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
981d24d4204SJose E. Roman     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
982d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
983d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
984d24d4204SJose E. Roman 
985d24d4204SJose E. Roman     /* redistribute matrix */
9865f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDenseGetArray(Bmpi,&barray));
987d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
9885f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDenseRestoreArray(Bmpi,&barray));
989d24d4204SJose E. Roman 
9905f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
9915f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
992d24d4204SJose E. Roman     if (reuse == MAT_INPLACE_MATRIX) {
9935f80ce2aSJacob Faibussowitsch       CHKERRQ(MatHeaderReplace(A,&Bmpi));
994d24d4204SJose E. Roman     } else *B = Bmpi;
9954b1a79daSJose E. Roman   }
996d24d4204SJose E. Roman   PetscFunctionReturn(0);
997d24d4204SJose E. Roman }
998d24d4204SJose E. Roman 
999d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1000d24d4204SJose E. Roman {
1001d24d4204SJose E. Roman   Mat_ScaLAPACK  *b;
1002d24d4204SJose E. Roman   Mat            Bmpi;
1003d24d4204SJose E. Roman   MPI_Comm       comm;
100492c846b4SJose E. Roman   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1005d24d4204SJose E. Roman   const PetscInt *ranges;
1006d24d4204SJose E. Roman   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1007d24d4204SJose E. Roman   PetscScalar    *aarray;
10084e8b52a1SJose E. Roman   PetscInt       lda;
1009d24d4204SJose E. Roman 
1010d24d4204SJose E. Roman   PetscFunctionBegin;
10115f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
1012d24d4204SJose E. Roman 
1013d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1014d24d4204SJose E. Roman   else {
10155f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreate(comm,&Bmpi));
101692c846b4SJose E. Roman     m = PETSC_DECIDE;
10175f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
101892c846b4SJose E. Roman     n = PETSC_DECIDE;
10195f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
10205f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetSizes(Bmpi,m,n,M,N));
10215f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetType(Bmpi,MATSCALAPACK));
10225f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetUp(Bmpi));
1023d24d4204SJose E. Roman   }
1024d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bmpi->data;
1025d24d4204SJose E. Roman 
1026d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for A (1d block distribution) */
10275f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
10285f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
10295f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDenseGetLDA(A,&lda));
10304e8b52a1SJose E. Roman   lld = PetscMax(lda,1);  /* local leading dimension */
1031d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1032d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1033d24d4204SJose E. Roman 
1034d24d4204SJose E. Roman   /* redistribute matrix */
10355f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDenseGetArray(A,&aarray));
1036d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
10375f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDenseRestoreArray(A,&aarray));
1038d24d4204SJose E. Roman 
10395f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
10405f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1041d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
10425f80ce2aSJacob Faibussowitsch     CHKERRQ(MatHeaderReplace(A,&Bmpi));
1043d24d4204SJose E. Roman   } else *B = Bmpi;
1044d24d4204SJose E. Roman   PetscFunctionReturn(0);
1045d24d4204SJose E. Roman }
1046d24d4204SJose E. Roman 
1047d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1048d24d4204SJose E. Roman {
1049d24d4204SJose E. Roman   Mat               mat_scal;
1050d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1051d24d4204SJose E. Roman   const PetscInt    *cols;
1052d24d4204SJose E. Roman   const PetscScalar *vals;
1053d24d4204SJose E. Roman 
1054d24d4204SJose E. Roman   PetscFunctionBegin;
1055d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1056d24d4204SJose E. Roman     mat_scal = *newmat;
10575f80ce2aSJacob Faibussowitsch     CHKERRQ(MatZeroEntries(mat_scal));
1058d24d4204SJose E. Roman   } else {
10595f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1060d24d4204SJose E. Roman     m = PETSC_DECIDE;
10615f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1062d24d4204SJose E. Roman     n = PETSC_DECIDE;
10635f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
10645f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetSizes(mat_scal,m,n,M,N));
10655f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetType(mat_scal,MATSCALAPACK));
10665f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetUp(mat_scal));
1067d24d4204SJose E. Roman   }
1068d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
10695f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals));
10705f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
10715f80ce2aSJacob Faibussowitsch     CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals));
1072d24d4204SJose E. Roman   }
10735f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
10745f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1075d24d4204SJose E. Roman 
10765f80ce2aSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal));
1077d24d4204SJose E. Roman   else *newmat = mat_scal;
1078d24d4204SJose E. Roman   PetscFunctionReturn(0);
1079d24d4204SJose E. Roman }
1080d24d4204SJose E. Roman 
1081d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1082d24d4204SJose E. Roman {
1083d24d4204SJose E. Roman   Mat               mat_scal;
1084d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1085d24d4204SJose E. Roman   const PetscInt    *cols;
1086d24d4204SJose E. Roman   const PetscScalar *vals;
1087d24d4204SJose E. Roman   PetscScalar       v;
1088d24d4204SJose E. Roman 
1089d24d4204SJose E. Roman   PetscFunctionBegin;
1090d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1091d24d4204SJose E. Roman     mat_scal = *newmat;
10925f80ce2aSJacob Faibussowitsch     CHKERRQ(MatZeroEntries(mat_scal));
1093d24d4204SJose E. Roman   } else {
10945f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1095d24d4204SJose E. Roman     m = PETSC_DECIDE;
10965f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1097d24d4204SJose E. Roman     n = PETSC_DECIDE;
10985f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
10995f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetSizes(mat_scal,m,n,M,N));
11005f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetType(mat_scal,MATSCALAPACK));
11015f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetUp(mat_scal));
1102d24d4204SJose E. Roman   }
11035f80ce2aSJacob Faibussowitsch   CHKERRQ(MatGetRowUpperTriangular(A));
1104d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
11055f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals));
11065f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1107d24d4204SJose E. Roman     for (j=0;j<ncols;j++) { /* lower triangular part */
1108d24d4204SJose E. Roman       if (cols[j] == row) continue;
1109d24d4204SJose E. Roman       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
11105f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1111d24d4204SJose E. Roman     }
11125f80ce2aSJacob Faibussowitsch     CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals));
1113d24d4204SJose E. Roman   }
11145f80ce2aSJacob Faibussowitsch   CHKERRQ(MatRestoreRowUpperTriangular(A));
11155f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
11165f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1117d24d4204SJose E. Roman 
11185f80ce2aSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal));
1119d24d4204SJose E. Roman   else *newmat = mat_scal;
1120d24d4204SJose E. Roman   PetscFunctionReturn(0);
1121d24d4204SJose E. Roman }
1122d24d4204SJose E. Roman 
1123d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1124d24d4204SJose E. Roman {
1125d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1126d24d4204SJose E. Roman   PetscInt       sz=0;
1127d24d4204SJose E. Roman 
1128d24d4204SJose E. Roman   PetscFunctionBegin;
11295f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutSetUp(A->rmap));
11305f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutSetUp(A->cmap));
1131d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1132d24d4204SJose E. Roman 
11335f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(a->loc));
11345f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscIntMultError(a->lld,a->locc,&sz));
11355f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc1(sz,&a->loc));
11365f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1137d24d4204SJose E. Roman 
1138d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
1139d24d4204SJose E. Roman   PetscFunctionReturn(0);
1140d24d4204SJose E. Roman }
1141d24d4204SJose E. Roman 
1142d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1143d24d4204SJose E. Roman {
1144d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1145d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1146d24d4204SJose E. Roman   PetscBool          flg;
1147d24d4204SJose E. Roman   MPI_Comm           icomm;
1148d24d4204SJose E. Roman 
1149d24d4204SJose E. Roman   PetscFunctionBegin;
11505f80ce2aSJacob Faibussowitsch   CHKERRQ(MatStashDestroy_Private(&A->stash));
11515f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(a->loc));
11525f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(a->pivots));
11535f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
11545f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1155d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1156d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1157d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1158d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
11595f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(grid));
11605f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1161d24d4204SJose E. Roman   }
11625f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCommDestroy(&icomm));
11635f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
11645f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
11655f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
11665f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
11675f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(A->data));
1168d24d4204SJose E. Roman   PetscFunctionReturn(0);
1169d24d4204SJose E. Roman }
1170d24d4204SJose E. Roman 
11719fbee547SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1172d24d4204SJose E. Roman {
1173d24d4204SJose E. Roman   const PetscInt *ranges;
1174d24d4204SJose E. Roman   PetscMPIInt    size;
1175d24d4204SJose E. Roman   PetscInt       i,n;
1176d24d4204SJose E. Roman 
1177d24d4204SJose E. Roman   PetscFunctionBegin;
11785f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(map->comm,&size));
1179d24d4204SJose E. Roman   if (size>2) {
11805f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscLayoutGetRanges(map,&ranges));
1181d24d4204SJose E. Roman     n = ranges[1]-ranges[0];
1182d24d4204SJose E. Roman     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
11832c71b3e2SJacob Faibussowitsch     PetscCheckFalse(i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1184d24d4204SJose E. Roman   }
1185d24d4204SJose E. Roman   PetscFunctionReturn(0);
1186d24d4204SJose E. Roman }
1187d24d4204SJose E. Roman 
1188d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1189d24d4204SJose E. Roman {
1190d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1191d24d4204SJose E. Roman   PetscBLASInt   info=0;
1192d24d4204SJose E. Roman 
1193d24d4204SJose E. Roman   PetscFunctionBegin;
11945f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutSetUp(A->rmap));
11955f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutSetUp(A->cmap));
1196d24d4204SJose E. Roman 
1197d24d4204SJose E. Roman   /* check that the layout is as enforced by MatCreateScaLAPACK */
11985f80ce2aSJacob Faibussowitsch   CHKERRQ(MatScaLAPACKCheckLayout(A->rmap));
11995f80ce2aSJacob Faibussowitsch   CHKERRQ(MatScaLAPACKCheckLayout(A->cmap));
1200d24d4204SJose E. Roman 
1201d24d4204SJose E. Roman   /* compute local sizes */
12025f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(A->rmap->N,&a->M));
12035f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(A->cmap->N,&a->N));
1204d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1205d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1206d24d4204SJose E. Roman   a->lld  = PetscMax(1,a->locr);
1207d24d4204SJose E. Roman 
1208d24d4204SJose E. Roman   /* allocate local array */
12095f80ce2aSJacob Faibussowitsch   CHKERRQ(MatScaLAPACKSetPreallocation(A));
1210d24d4204SJose E. Roman 
1211d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1212d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1213d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1214d24d4204SJose E. Roman   PetscFunctionReturn(0);
1215d24d4204SJose E. Roman }
1216d24d4204SJose E. Roman 
1217d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1218d24d4204SJose E. Roman {
1219d24d4204SJose E. Roman   PetscInt       nstash,reallocs;
1220d24d4204SJose E. Roman 
1221d24d4204SJose E. Roman   PetscFunctionBegin;
1222d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
12235f80ce2aSJacob Faibussowitsch   CHKERRQ(MatStashScatterBegin_Private(A,&A->stash,NULL));
12245f80ce2aSJacob Faibussowitsch   CHKERRQ(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
12255f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1226d24d4204SJose E. Roman   PetscFunctionReturn(0);
1227d24d4204SJose E. Roman }
1228d24d4204SJose E. Roman 
1229d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1230d24d4204SJose E. Roman {
1231d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1232d24d4204SJose E. Roman   PetscMPIInt    n;
1233d24d4204SJose E. Roman   PetscInt       i,flg,*row,*col;
1234d24d4204SJose E. Roman   PetscScalar    *val;
1235d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1236d24d4204SJose E. Roman 
1237d24d4204SJose E. Roman   PetscFunctionBegin;
1238d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1239d24d4204SJose E. Roman   while (1) {
12405f80ce2aSJacob Faibussowitsch     CHKERRQ(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1241d24d4204SJose E. Roman     if (!flg) break;
1242d24d4204SJose E. Roman     for (i=0;i<n;i++) {
12435f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBLASIntCast(row[i]+1,&gridx));
12445f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBLASIntCast(col[i]+1,&gcidx));
1245d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
12462c71b3e2SJacob Faibussowitsch       PetscCheckFalse(rsrc!=a->grid->myrow || csrc!=a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1247d24d4204SJose E. Roman       switch (A->insertmode) {
1248d24d4204SJose E. Roman         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1249d24d4204SJose E. Roman         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
125098921bdaSJacob Faibussowitsch         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1251d24d4204SJose E. Roman       }
1252d24d4204SJose E. Roman     }
1253d24d4204SJose E. Roman   }
12545f80ce2aSJacob Faibussowitsch   CHKERRQ(MatStashScatterEnd_Private(&A->stash));
1255d24d4204SJose E. Roman   PetscFunctionReturn(0);
1256d24d4204SJose E. Roman }
1257d24d4204SJose E. Roman 
1258d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1259d24d4204SJose E. Roman {
1260d24d4204SJose E. Roman   Mat            Adense,As;
1261d24d4204SJose E. Roman   MPI_Comm       comm;
1262d24d4204SJose E. Roman 
1263d24d4204SJose E. Roman   PetscFunctionBegin;
12645f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)newMat,&comm));
12655f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(comm,&Adense));
12665f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetType(Adense,MATDENSE));
12675f80ce2aSJacob Faibussowitsch   CHKERRQ(MatLoad(Adense,viewer));
12685f80ce2aSJacob Faibussowitsch   CHKERRQ(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
12695f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&Adense));
12705f80ce2aSJacob Faibussowitsch   CHKERRQ(MatHeaderReplace(newMat,&As));
1271d24d4204SJose E. Roman   PetscFunctionReturn(0);
1272d24d4204SJose E. Roman }
1273d24d4204SJose E. Roman 
1274d24d4204SJose E. Roman /* -------------------------------------------------------------------*/
1275d24d4204SJose E. Roman static struct _MatOps MatOps_Values = {
1276d24d4204SJose E. Roman        MatSetValues_ScaLAPACK,
1277d24d4204SJose E. Roman        0,
1278d24d4204SJose E. Roman        0,
1279d24d4204SJose E. Roman        MatMult_ScaLAPACK,
1280d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK,
1281d24d4204SJose E. Roman        MatMultTranspose_ScaLAPACK,
1282d24d4204SJose E. Roman        MatMultTransposeAdd_ScaLAPACK,
1283d24d4204SJose E. Roman        MatSolve_ScaLAPACK,
1284d24d4204SJose E. Roman        MatSolveAdd_ScaLAPACK,
1285d24d4204SJose E. Roman        0,
1286d24d4204SJose E. Roman /*10*/ 0,
1287d24d4204SJose E. Roman        MatLUFactor_ScaLAPACK,
1288d24d4204SJose E. Roman        MatCholeskyFactor_ScaLAPACK,
1289d24d4204SJose E. Roman        0,
1290d24d4204SJose E. Roman        MatTranspose_ScaLAPACK,
1291d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK,
1292d24d4204SJose E. Roman        0,
1293d24d4204SJose E. Roman        MatGetDiagonal_ScaLAPACK,
1294d24d4204SJose E. Roman        MatDiagonalScale_ScaLAPACK,
1295d24d4204SJose E. Roman        MatNorm_ScaLAPACK,
1296d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK,
1297d24d4204SJose E. Roman        MatAssemblyEnd_ScaLAPACK,
1298d24d4204SJose E. Roman        MatSetOption_ScaLAPACK,
1299d24d4204SJose E. Roman        MatZeroEntries_ScaLAPACK,
1300d24d4204SJose E. Roman /*24*/ 0,
1301d24d4204SJose E. Roman        MatLUFactorSymbolic_ScaLAPACK,
1302d24d4204SJose E. Roman        MatLUFactorNumeric_ScaLAPACK,
1303d24d4204SJose E. Roman        MatCholeskyFactorSymbolic_ScaLAPACK,
1304d24d4204SJose E. Roman        MatCholeskyFactorNumeric_ScaLAPACK,
1305d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK,
1306d24d4204SJose E. Roman        0,
1307d24d4204SJose E. Roman        0,
1308d24d4204SJose E. Roman        0,
1309d24d4204SJose E. Roman        0,
1310d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK,
1311d24d4204SJose E. Roman        0,
1312d24d4204SJose E. Roman        0,
1313d24d4204SJose E. Roman        0,
1314d24d4204SJose E. Roman        0,
1315d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK,
1316d24d4204SJose E. Roman        0,
1317d24d4204SJose E. Roman        0,
1318d24d4204SJose E. Roman        0,
1319d24d4204SJose E. Roman        MatCopy_ScaLAPACK,
1320d24d4204SJose E. Roman /*44*/ 0,
1321d24d4204SJose E. Roman        MatScale_ScaLAPACK,
1322d24d4204SJose E. Roman        MatShift_ScaLAPACK,
1323d24d4204SJose E. Roman        0,
1324d24d4204SJose E. Roman        0,
1325d24d4204SJose E. Roman /*49*/ 0,
1326d24d4204SJose E. Roman        0,
1327d24d4204SJose E. Roman        0,
1328d24d4204SJose E. Roman        0,
1329d24d4204SJose E. Roman        0,
1330d24d4204SJose E. Roman /*54*/ 0,
1331d24d4204SJose E. Roman        0,
1332d24d4204SJose E. Roman        0,
1333d24d4204SJose E. Roman        0,
1334d24d4204SJose E. Roman        0,
1335d24d4204SJose E. Roman /*59*/ 0,
1336d24d4204SJose E. Roman        MatDestroy_ScaLAPACK,
1337d24d4204SJose E. Roman        MatView_ScaLAPACK,
1338d24d4204SJose E. Roman        0,
1339d24d4204SJose E. Roman        0,
1340d24d4204SJose E. Roman /*64*/ 0,
1341d24d4204SJose E. Roman        0,
1342d24d4204SJose E. Roman        0,
1343d24d4204SJose E. Roman        0,
1344d24d4204SJose E. Roman        0,
1345d24d4204SJose E. Roman /*69*/ 0,
1346d24d4204SJose E. Roman        0,
1347d24d4204SJose E. Roman        MatConvert_ScaLAPACK_Dense,
1348d24d4204SJose E. Roman        0,
1349d24d4204SJose E. Roman        0,
1350d24d4204SJose E. Roman /*74*/ 0,
1351d24d4204SJose E. Roman        0,
1352d24d4204SJose E. Roman        0,
1353d24d4204SJose E. Roman        0,
1354d24d4204SJose E. Roman        0,
1355d24d4204SJose E. Roman /*79*/ 0,
1356d24d4204SJose E. Roman        0,
1357d24d4204SJose E. Roman        0,
1358d24d4204SJose E. Roman        0,
1359d24d4204SJose E. Roman        MatLoad_ScaLAPACK,
1360d24d4204SJose E. Roman /*84*/ 0,
1361d24d4204SJose E. Roman        0,
1362d24d4204SJose E. Roman        0,
1363d24d4204SJose E. Roman        0,
1364d24d4204SJose E. Roman        0,
1365d24d4204SJose E. Roman /*89*/ 0,
1366d24d4204SJose E. Roman        0,
1367d24d4204SJose E. Roman        MatMatMultNumeric_ScaLAPACK,
1368d24d4204SJose E. Roman        0,
1369d24d4204SJose E. Roman        0,
1370d24d4204SJose E. Roman /*94*/ 0,
1371d24d4204SJose E. Roman        0,
1372d24d4204SJose E. Roman        0,
1373d24d4204SJose E. Roman        MatMatTransposeMultNumeric_ScaLAPACK,
1374d24d4204SJose E. Roman        0,
1375d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK,
1376d24d4204SJose E. Roman        0,
1377d24d4204SJose E. Roman        0,
1378d24d4204SJose E. Roman        MatConjugate_ScaLAPACK,
1379d24d4204SJose E. Roman        0,
1380d24d4204SJose E. Roman /*104*/0,
1381d24d4204SJose E. Roman        0,
1382d24d4204SJose E. Roman        0,
1383d24d4204SJose E. Roman        0,
1384d24d4204SJose E. Roman        0,
1385d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK,
1386d24d4204SJose E. Roman        0,
1387d24d4204SJose E. Roman        0,
1388d24d4204SJose E. Roman        0,
1389d24d4204SJose E. Roman        MatMissingDiagonal_ScaLAPACK,
1390d24d4204SJose E. Roman /*114*/0,
1391d24d4204SJose E. Roman        0,
1392d24d4204SJose E. Roman        0,
1393d24d4204SJose E. Roman        0,
1394d24d4204SJose E. Roman        0,
1395d24d4204SJose E. Roman /*119*/0,
1396d24d4204SJose E. Roman        MatHermitianTranspose_ScaLAPACK,
1397d24d4204SJose E. Roman        0,
1398d24d4204SJose E. Roman        0,
1399d24d4204SJose E. Roman        0,
1400d24d4204SJose E. Roman /*124*/0,
1401d24d4204SJose E. Roman        0,
1402d24d4204SJose E. Roman        0,
1403d24d4204SJose E. Roman        0,
1404d24d4204SJose E. Roman        0,
1405d24d4204SJose E. Roman /*129*/0,
1406d24d4204SJose E. Roman        0,
1407d24d4204SJose E. Roman        0,
1408d24d4204SJose E. Roman        0,
1409d24d4204SJose E. Roman        0,
1410d24d4204SJose E. Roman /*134*/0,
1411d24d4204SJose E. Roman        0,
1412d24d4204SJose E. Roman        0,
1413d24d4204SJose E. Roman        0,
1414d24d4204SJose E. Roman        0,
1415d24d4204SJose E. Roman        0,
1416d24d4204SJose E. Roman /*140*/0,
1417d24d4204SJose E. Roman        0,
1418d24d4204SJose E. Roman        0,
1419d24d4204SJose E. Roman        0,
1420d24d4204SJose E. Roman        0,
1421d24d4204SJose E. Roman /*145*/0,
1422d24d4204SJose E. Roman        0,
1423d24d4204SJose E. Roman        0
1424d24d4204SJose E. Roman };
1425d24d4204SJose E. Roman 
1426d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1427d24d4204SJose E. Roman {
1428d24d4204SJose E. Roman   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1429d24d4204SJose E. Roman   PetscInt           size=stash->size,nsends;
1430d24d4204SJose E. Roman   PetscInt           count,*sindices,**rindices,i,j,l;
1431d24d4204SJose E. Roman   PetscScalar        **rvalues,*svalues;
1432d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1433d24d4204SJose E. Roman   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1434d24d4204SJose E. Roman   PetscMPIInt        *sizes,*nlengths,nreceives;
1435d24d4204SJose E. Roman   PetscInt           *sp_idx,*sp_idy;
1436d24d4204SJose E. Roman   PetscScalar        *sp_val;
1437d24d4204SJose E. Roman   PetscMatStashSpace space,space_next;
1438d24d4204SJose E. Roman   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1439d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1440d24d4204SJose E. Roman 
1441d24d4204SJose E. Roman   PetscFunctionBegin;
1442d24d4204SJose E. Roman   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1443d24d4204SJose E. Roman     InsertMode addv;
14445f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
14452c71b3e2SJacob Faibussowitsch     PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1446d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1447d24d4204SJose E. Roman   }
1448d24d4204SJose E. Roman 
1449d24d4204SJose E. Roman   bs2 = stash->bs*stash->bs;
1450d24d4204SJose E. Roman 
1451d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
14525f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc1(size,&nlengths));
14535f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(stash->n+1,&owner));
1454d24d4204SJose E. Roman 
1455d24d4204SJose E. Roman   i     = j    = 0;
1456d24d4204SJose E. Roman   space = stash->space_head;
1457d24d4204SJose E. Roman   while (space) {
1458d24d4204SJose E. Roman     space_next = space->next;
1459d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
14605f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBLASIntCast(space->idx[l]+1,&gridx));
14615f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1462d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1463d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1464d24d4204SJose E. Roman       nlengths[j]++; owner[i] = j;
1465d24d4204SJose E. Roman       i++;
1466d24d4204SJose E. Roman     }
1467d24d4204SJose E. Roman     space = space_next;
1468d24d4204SJose E. Roman   }
1469d24d4204SJose E. Roman 
1470d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
14715f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc1(size,&sizes));
1472d24d4204SJose E. Roman   for (i=0, nsends=0; i<size; i++) {
1473d24d4204SJose E. Roman     if (nlengths[i]) {
1474d24d4204SJose E. Roman       sizes[i] = 1; nsends++;
1475d24d4204SJose E. Roman     }
1476d24d4204SJose E. Roman   }
1477d24d4204SJose E. Roman 
1478d24d4204SJose E. Roman   {PetscMPIInt *onodes,*olengths;
1479d24d4204SJose E. Roman    /* Determine the number of messages to expect, their lengths, from from-ids */
14805f80ce2aSJacob Faibussowitsch    CHKERRQ(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
14815f80ce2aSJacob Faibussowitsch    CHKERRQ(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1482d24d4204SJose E. Roman    /* since clubbing row,col - lengths are multiplied by 2 */
1483d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] *=2;
14845f80ce2aSJacob Faibussowitsch    CHKERRQ(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1485d24d4204SJose E. Roman    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1486d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
14875f80ce2aSJacob Faibussowitsch    CHKERRQ(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
14885f80ce2aSJacob Faibussowitsch    CHKERRQ(PetscFree(onodes));
14895f80ce2aSJacob Faibussowitsch    CHKERRQ(PetscFree(olengths));}
1490d24d4204SJose E. Roman 
1491d24d4204SJose E. Roman   /* do sends:
1492d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1493d24d4204SJose E. Roman          the ith processor
1494d24d4204SJose E. Roman   */
14955f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
14965f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(2*nsends,&send_waits));
14975f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(size,&startv,size,&starti));
1498d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
1499d24d4204SJose E. Roman   startv[0] = 0; starti[0] = 0;
1500d24d4204SJose E. Roman   for (i=1; i<size; i++) {
1501d24d4204SJose E. Roman     startv[i] = startv[i-1] + nlengths[i-1];
1502d24d4204SJose E. Roman     starti[i] = starti[i-1] + 2*nlengths[i-1];
1503d24d4204SJose E. Roman   }
1504d24d4204SJose E. Roman 
1505d24d4204SJose E. Roman   i     = 0;
1506d24d4204SJose E. Roman   space = stash->space_head;
1507d24d4204SJose E. Roman   while (space) {
1508d24d4204SJose E. Roman     space_next = space->next;
1509d24d4204SJose E. Roman     sp_idx     = space->idx;
1510d24d4204SJose E. Roman     sp_idy     = space->idy;
1511d24d4204SJose E. Roman     sp_val     = space->val;
1512d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
1513d24d4204SJose E. Roman       j = owner[i];
1514d24d4204SJose E. Roman       if (bs2 == 1) {
1515d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1516d24d4204SJose E. Roman       } else {
1517d24d4204SJose E. Roman         PetscInt    k;
1518d24d4204SJose E. Roman         PetscScalar *buf1,*buf2;
1519d24d4204SJose E. Roman         buf1 = svalues+bs2*startv[j];
1520d24d4204SJose E. Roman         buf2 = space->val + bs2*l;
1521d24d4204SJose E. Roman         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1522d24d4204SJose E. Roman       }
1523d24d4204SJose E. Roman       sindices[starti[j]]             = sp_idx[l];
1524d24d4204SJose E. Roman       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1525d24d4204SJose E. Roman       startv[j]++;
1526d24d4204SJose E. Roman       starti[j]++;
1527d24d4204SJose E. Roman       i++;
1528d24d4204SJose E. Roman     }
1529d24d4204SJose E. Roman     space = space_next;
1530d24d4204SJose E. Roman   }
1531d24d4204SJose E. Roman   startv[0] = 0;
1532d24d4204SJose E. Roman   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1533d24d4204SJose E. Roman 
1534d24d4204SJose E. Roman   for (i=0,count=0; i<size; i++) {
1535d24d4204SJose E. Roman     if (sizes[i]) {
15365f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
15375f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1538d24d4204SJose E. Roman     }
1539d24d4204SJose E. Roman   }
1540d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
15415f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1542d24d4204SJose E. Roman   for (i=0; i<size; i++) {
1543d24d4204SJose E. Roman     if (sizes[i]) {
15445f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1545d24d4204SJose E. Roman     }
1546d24d4204SJose E. Roman   }
1547d24d4204SJose E. Roman #endif
15485f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(nlengths));
15495f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(owner));
15505f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(startv,starti));
15515f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sizes));
1552d24d4204SJose E. Roman 
1553d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
15545f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(2*nreceives,&recv_waits));
1555d24d4204SJose E. Roman 
1556d24d4204SJose E. Roman   for (i=0; i<nreceives; i++) {
1557d24d4204SJose E. Roman     recv_waits[2*i]   = recv_waits1[i];
1558d24d4204SJose E. Roman     recv_waits[2*i+1] = recv_waits2[i];
1559d24d4204SJose E. Roman   }
1560d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1561d24d4204SJose E. Roman 
15625f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(recv_waits1));
15635f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(recv_waits2));
1564d24d4204SJose E. Roman 
1565d24d4204SJose E. Roman   stash->svalues         = svalues;
1566d24d4204SJose E. Roman   stash->sindices        = sindices;
1567d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1568d24d4204SJose E. Roman   stash->rindices        = rindices;
1569d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1570d24d4204SJose E. Roman   stash->nsends          = nsends;
1571d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1572d24d4204SJose E. Roman   stash->reproduce_count = 0;
1573d24d4204SJose E. Roman   PetscFunctionReturn(0);
1574d24d4204SJose E. Roman }
1575d24d4204SJose E. Roman 
1576d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1577d24d4204SJose E. Roman {
1578d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1579d24d4204SJose E. Roman 
1580d24d4204SJose E. Roman   PetscFunctionBegin;
1581*28b400f6SJacob Faibussowitsch   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
15822c71b3e2SJacob Faibussowitsch   PetscCheckFalse(mb<1 && mb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
15832c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nb<1 && nb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
15845f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
15855f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1586d24d4204SJose E. Roman   PetscFunctionReturn(0);
1587d24d4204SJose E. Roman }
1588d24d4204SJose E. Roman 
1589d24d4204SJose E. Roman /*@
1590d24d4204SJose E. Roman    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1591d24d4204SJose E. Roman    the ScaLAPACK matrix
1592d24d4204SJose E. Roman 
1593d24d4204SJose E. Roman    Logically Collective on A
1594d24d4204SJose E. Roman 
1595d8d19677SJose E. Roman    Input Parameters:
1596d24d4204SJose E. Roman +  A  - a MATSCALAPACK matrix
1597d24d4204SJose E. Roman .  mb - the row block size
1598d24d4204SJose E. Roman -  nb - the column block size
1599d24d4204SJose E. Roman 
1600d24d4204SJose E. Roman    Level: intermediate
1601d24d4204SJose E. Roman 
1602d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1603d24d4204SJose E. Roman @*/
1604d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1605d24d4204SJose E. Roman {
1606d24d4204SJose E. Roman   PetscFunctionBegin;
1607d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1608d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,mb,2);
1609d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,nb,3);
16105f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)));
1611d24d4204SJose E. Roman   PetscFunctionReturn(0);
1612d24d4204SJose E. Roman }
1613d24d4204SJose E. Roman 
1614d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1615d24d4204SJose E. Roman {
1616d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1617d24d4204SJose E. Roman 
1618d24d4204SJose E. Roman   PetscFunctionBegin;
1619d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1620d24d4204SJose E. Roman   if (nb) *nb = a->nb;
1621d24d4204SJose E. Roman   PetscFunctionReturn(0);
1622d24d4204SJose E. Roman }
1623d24d4204SJose E. Roman 
1624d24d4204SJose E. Roman /*@
1625d24d4204SJose E. Roman    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1626d24d4204SJose E. Roman    the ScaLAPACK matrix
1627d24d4204SJose E. Roman 
1628d24d4204SJose E. Roman    Not collective
1629d24d4204SJose E. Roman 
1630d24d4204SJose E. Roman    Input Parameter:
1631d24d4204SJose E. Roman .  A  - a MATSCALAPACK matrix
1632d24d4204SJose E. Roman 
1633d24d4204SJose E. Roman    Output Parameters:
1634d24d4204SJose E. Roman +  mb - the row block size
1635d24d4204SJose E. Roman -  nb - the column block size
1636d24d4204SJose E. Roman 
1637d24d4204SJose E. Roman    Level: intermediate
1638d24d4204SJose E. Roman 
1639d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1640d24d4204SJose E. Roman @*/
1641d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1642d24d4204SJose E. Roman {
1643d24d4204SJose E. Roman   PetscFunctionBegin;
1644d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
16455f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)));
1646d24d4204SJose E. Roman   PetscFunctionReturn(0);
1647d24d4204SJose E. Roman }
1648d24d4204SJose E. Roman 
1649d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1650d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1651d24d4204SJose E. Roman 
1652d24d4204SJose E. Roman /*MC
1653d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1654d24d4204SJose E. Roman 
1655d24d4204SJose E. Roman    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1656d24d4204SJose E. Roman 
1657d24d4204SJose E. Roman    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1658d24d4204SJose E. Roman 
1659d24d4204SJose E. Roman    Options Database Keys:
1660d24d4204SJose E. Roman +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1661d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1662d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1663d24d4204SJose E. Roman 
1664d24d4204SJose E. Roman    Level: beginner
1665d24d4204SJose E. Roman 
1666d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL
1667d24d4204SJose E. Roman M*/
1668d24d4204SJose E. Roman 
1669d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1670d24d4204SJose E. Roman {
1671d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1672d24d4204SJose E. Roman   PetscErrorCode     ierr;
1673d24d4204SJose E. Roman   PetscBool          flg,flg1;
1674d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1675d24d4204SJose E. Roman   MPI_Comm           icomm;
1676d24d4204SJose E. Roman   PetscBLASInt       nprow,npcol,myrow,mycol;
1677d24d4204SJose E. Roman   PetscInt           optv1,k=2,array[2]={0,0};
1678d24d4204SJose E. Roman   PetscMPIInt        size;
1679d24d4204SJose E. Roman 
1680d24d4204SJose E. Roman   PetscFunctionBegin;
16815f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1682d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1683d24d4204SJose E. Roman 
16845f80ce2aSJacob Faibussowitsch   CHKERRQ(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1685d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1686d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1687d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1688d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1689d24d4204SJose E. Roman 
16905f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscNewLog(A,&a));
1691d24d4204SJose E. Roman   A->data = (void*)a;
1692d24d4204SJose E. Roman 
1693d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1694d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
16955f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
16965f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
16975f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1698d24d4204SJose E. Roman   }
16995f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
17005f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1701d24d4204SJose E. Roman   if (!flg) {
17025f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscNewLog(A,&grid));
1703d24d4204SJose E. Roman 
17045f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_size(icomm,&size));
1705d24d4204SJose E. Roman     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1706d24d4204SJose E. Roman 
1707d24d4204SJose E. Roman     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
17085f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1709d24d4204SJose E. Roman     if (flg1) {
17102c71b3e2SJacob Faibussowitsch       PetscCheckFalse(size % optv1,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1711d24d4204SJose E. Roman       grid->nprow = optv1;
1712d24d4204SJose E. Roman     }
1713d24d4204SJose E. Roman     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1714d24d4204SJose E. Roman 
1715d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1716d24d4204SJose E. Roman     grid->npcol = size/grid->nprow;
17175f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(grid->nprow,&nprow));
17185f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(grid->npcol,&npcol));
1719f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1720d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1721d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1722d24d4204SJose E. Roman     grid->grid_refct = 1;
1723d24d4204SJose E. Roman     grid->nprow      = nprow;
1724d24d4204SJose E. Roman     grid->npcol      = npcol;
1725d24d4204SJose E. Roman     grid->myrow      = myrow;
1726d24d4204SJose E. Roman     grid->mycol      = mycol;
1727d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1728f7ec113fSDamian Marek     grid->ictxrow = Csys2blacs_handle(icomm);
1729d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1730f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1731d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
17325f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1733d24d4204SJose E. Roman 
1734d24d4204SJose E. Roman   } else grid->grid_refct++;
17355f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCommDestroy(&icomm));
1736d24d4204SJose E. Roman   a->grid = grid;
1737d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1738d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1739d24d4204SJose E. Roman 
1740d24d4204SJose E. Roman   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
17415f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1742d24d4204SJose E. Roman   if (flg) {
1743d24d4204SJose E. Roman     a->mb = array[0];
1744d24d4204SJose E. Roman     a->nb = (k>1)? array[1]: a->mb;
1745d24d4204SJose E. Roman   }
1746d24d4204SJose E. Roman   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1747d24d4204SJose E. Roman 
17485f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
17495f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
17505f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
17515f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1752d24d4204SJose E. Roman   PetscFunctionReturn(0);
1753d24d4204SJose E. Roman }
1754d24d4204SJose E. Roman 
1755d24d4204SJose E. Roman /*@C
1756d24d4204SJose E. Roman    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1757d24d4204SJose E. Roman    (2D block cyclic distribution).
1758d24d4204SJose E. Roman 
1759d24d4204SJose E. Roman    Collective
1760d24d4204SJose E. Roman 
1761d24d4204SJose E. Roman    Input Parameters:
1762d24d4204SJose E. Roman +  comm - MPI communicator
1763d24d4204SJose E. Roman .  mb   - row block size (or PETSC_DECIDE to have it set)
1764d24d4204SJose E. Roman .  nb   - column block size (or PETSC_DECIDE to have it set)
1765d24d4204SJose E. Roman .  M    - number of global rows
1766d24d4204SJose E. Roman .  N    - number of global columns
1767d24d4204SJose E. Roman .  rsrc - coordinate of process that owns the first row of the distributed matrix
1768d24d4204SJose E. Roman -  csrc - coordinate of process that owns the first column of the distributed matrix
1769d24d4204SJose E. Roman 
1770d24d4204SJose E. Roman    Output Parameter:
1771d24d4204SJose E. Roman .  A - the matrix
1772d24d4204SJose E. Roman 
1773d24d4204SJose E. Roman    Options Database Keys:
1774d24d4204SJose E. Roman .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1775d24d4204SJose E. Roman 
1776d24d4204SJose E. Roman    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1777d24d4204SJose E. Roman    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1778d24d4204SJose E. Roman    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1779d24d4204SJose E. Roman 
1780d24d4204SJose E. Roman    Notes:
1781d24d4204SJose E. Roman    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1782d24d4204SJose E. Roman    is chosen.
1783d24d4204SJose E. Roman 
1784d24d4204SJose E. Roman    Storage Information:
1785d24d4204SJose E. Roman    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1786d24d4204SJose E. Roman    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1787d24d4204SJose E. Roman    significance and are thus ignored. The block sizes refer to the values
1788d24d4204SJose E. Roman    used for the distributed matrix, not the same meaning as in BAIJ.
1789d24d4204SJose E. Roman 
1790d24d4204SJose E. Roman    Level: intermediate
1791d24d4204SJose E. Roman 
1792d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1793d24d4204SJose E. Roman @*/
1794d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1795d24d4204SJose E. Roman {
1796d24d4204SJose E. Roman   Mat_ScaLAPACK  *a;
1797d24d4204SJose E. Roman   PetscInt       m,n;
1798d24d4204SJose E. Roman 
1799d24d4204SJose E. Roman   PetscFunctionBegin;
18005f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(comm,A));
18015f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetType(*A,MATSCALAPACK));
18022c71b3e2SJacob Faibussowitsch   PetscCheckFalse(M==PETSC_DECIDE || N==PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1803d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1804d24d4204SJose E. Roman   m = PETSC_DECIDE;
18055f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
1806d24d4204SJose E. Roman   n = PETSC_DECIDE;
18075f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
18085f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(*A,m,n,M,N));
1809d24d4204SJose E. Roman   a = (Mat_ScaLAPACK*)(*A)->data;
18105f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(M,&a->M));
18115f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(N,&a->N));
18125f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
18135f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
18145f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(rsrc,&a->rsrc));
18155f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBLASIntCast(csrc,&a->csrc));
18165f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetUp(*A));
1817d24d4204SJose E. Roman   PetscFunctionReturn(0);
1818d24d4204SJose E. Roman }
1819