xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   CHKERRQ(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
26   CHKERRMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31 {
32   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39   if (iascii) {
40     CHKERRQ(PetscViewerGetFormat(viewer,&format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       CHKERRQ(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
43       CHKERRQ(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
44       CHKERRQ(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
45       CHKERRQ(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   CHKERRQ(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
53   CHKERRQ(MatView(Adense,viewer));
54   CHKERRQ(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59 {
60   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61   PetscLogDouble isend[2],irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld*a->locc;     /* locally allocated */
67   isend[1] = a->locr*a->locc;    /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated   = isend[0];
70     info->nz_used        = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A)));
73     info->nz_allocated   = irecv[0];
74     info->nz_used        = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     CHKERRMPI(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = ((PetscObject)A)->mem;
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92 {
93   PetscFunctionBegin;
94   switch (op) {
95     case MAT_NEW_NONZERO_LOCATIONS:
96     case MAT_NEW_NONZERO_LOCATION_ERR:
97     case MAT_NEW_NONZERO_ALLOCATION_ERR:
98     case MAT_SYMMETRIC:
99     case MAT_SORTED_FULL:
100     case MAT_HERMITIAN:
101       break;
102     default:
103       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
109 {
110   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
111   PetscInt       i,j;
112   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
113 
114   PetscFunctionBegin;
115   for (i=0;i<nr;i++) {
116     if (rows[i] < 0) continue;
117     CHKERRQ(PetscBLASIntCast(rows[i]+1,&gridx));
118     for (j=0;j<nc;j++) {
119       if (cols[j] < 0) continue;
120       CHKERRQ(PetscBLASIntCast(cols[j]+1,&gcidx));
121       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
122       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
123         switch (imode) {
124           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
125           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
126           default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
127         }
128       } else {
129         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
130         A->assembled = PETSC_FALSE;
131         CHKERRQ(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES)));
132       }
133     }
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
139 {
140   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
141   PetscScalar    *x2d,*y2d,alpha=1.0;
142   const PetscInt *ranges;
143   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
144 
145   PetscFunctionBegin;
146   if (transpose) {
147 
148     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
149     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
150     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
151     xlld = PetscMax(1,A->rmap->n);
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
155     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
156     ylld = 1;
157     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
158     PetscCheckScaLapackInfo("descinit",info);
159 
160     /* allocate 2d vectors */
161     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
162     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
163     CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d));
164     xlld = PetscMax(1,lszx);
165 
166     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
167     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
168     PetscCheckScaLapackInfo("descinit",info);
169     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
170     PetscCheckScaLapackInfo("descinit",info);
171 
172     /* redistribute x as a column of a 2d matrix */
173     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
174 
175     /* redistribute y as a row of a 2d matrix */
176     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
177 
178     /* call PBLAS subroutine */
179     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
180 
181     /* redistribute y from a row of a 2d matrix */
182     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
183 
184   } else {   /* non-transpose */
185 
186     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
187     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
188     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
189     xlld = 1;
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
193     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
194     ylld = PetscMax(1,A->rmap->n);
195     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
196     PetscCheckScaLapackInfo("descinit",info);
197 
198     /* allocate 2d vectors */
199     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
200     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
201     CHKERRQ(PetscMalloc2(lszx,&x2d,lszy,&y2d));
202     ylld = PetscMax(1,lszy);
203 
204     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
205     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
206     PetscCheckScaLapackInfo("descinit",info);
207     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
208     PetscCheckScaLapackInfo("descinit",info);
209 
210     /* redistribute x as a row of a 2d matrix */
211     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
212 
213     /* redistribute y as a column of a 2d matrix */
214     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
215 
216     /* call PBLAS subroutine */
217     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
218 
219     /* redistribute y from a column of a 2d matrix */
220     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
221 
222   }
223   CHKERRQ(PetscFree2(x2d,y2d));
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   const PetscScalar *xarray;
230   PetscScalar       *yarray;
231 
232   PetscFunctionBegin;
233   CHKERRQ(VecGetArrayRead(x,&xarray));
234   CHKERRQ(VecGetArray(y,&yarray));
235   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
236   CHKERRQ(VecRestoreArrayRead(x,&xarray));
237   CHKERRQ(VecRestoreArray(y,&yarray));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
242 {
243   const PetscScalar *xarray;
244   PetscScalar       *yarray;
245 
246   PetscFunctionBegin;
247   CHKERRQ(VecGetArrayRead(x,&xarray));
248   CHKERRQ(VecGetArray(y,&yarray));
249   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
250   CHKERRQ(VecRestoreArrayRead(x,&xarray));
251   CHKERRQ(VecRestoreArray(y,&yarray));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
256 {
257   const PetscScalar *xarray;
258   PetscScalar       *zarray;
259 
260   PetscFunctionBegin;
261   if (y != z) CHKERRQ(VecCopy(y,z));
262   CHKERRQ(VecGetArrayRead(x,&xarray));
263   CHKERRQ(VecGetArray(z,&zarray));
264   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
265   CHKERRQ(VecRestoreArrayRead(x,&xarray));
266   CHKERRQ(VecRestoreArray(z,&zarray));
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
271 {
272   const PetscScalar *xarray;
273   PetscScalar       *zarray;
274 
275   PetscFunctionBegin;
276   if (y != z) CHKERRQ(VecCopy(y,z));
277   CHKERRQ(VecGetArrayRead(x,&xarray));
278   CHKERRQ(VecGetArray(z,&zarray));
279   CHKERRQ(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
280   CHKERRQ(VecRestoreArrayRead(x,&xarray));
281   CHKERRQ(VecRestoreArray(z,&zarray));
282   PetscFunctionReturn(0);
283 }
284 
285 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
286 {
287   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
288   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
289   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
290   PetscScalar   sone=1.0,zero=0.0;
291   PetscBLASInt  one=1;
292 
293   PetscFunctionBegin;
294   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
295   C->assembled = PETSC_TRUE;
296   PetscFunctionReturn(0);
297 }
298 
299 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
300 {
301   PetscFunctionBegin;
302   CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
303   CHKERRQ(MatSetType(C,MATSCALAPACK));
304   CHKERRQ(MatSetUp(C));
305   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
306   PetscFunctionReturn(0);
307 }
308 
309 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
310 {
311   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
312   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
313   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
314   PetscScalar   sone=1.0,zero=0.0;
315   PetscBLASInt  one=1;
316 
317   PetscFunctionBegin;
318   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
319   C->assembled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
324 {
325   PetscFunctionBegin;
326   CHKERRQ(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
327   CHKERRQ(MatSetType(C,MATSCALAPACK));
328   CHKERRQ(MatSetUp(C));
329   PetscFunctionReturn(0);
330 }
331 
332 /* --------------------------------------- */
333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
334 {
335   PetscFunctionBegin;
336   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
337   C->ops->productsymbolic = MatProductSymbolic_AB;
338   PetscFunctionReturn(0);
339 }
340 
341 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
342 {
343   PetscFunctionBegin;
344   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
345   C->ops->productsymbolic          = MatProductSymbolic_ABt;
346   PetscFunctionReturn(0);
347 }
348 
349 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
350 {
351   Mat_Product    *product = C->product;
352 
353   PetscFunctionBegin;
354   switch (product->type) {
355     case MATPRODUCT_AB:
356       CHKERRQ(MatProductSetFromOptions_ScaLAPACK_AB(C));
357       break;
358     case MATPRODUCT_ABt:
359       CHKERRQ(MatProductSetFromOptions_ScaLAPACK_ABt(C));
360       break;
361     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
362   }
363   PetscFunctionReturn(0);
364 }
365 /* --------------------------------------- */
366 
367 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
368 {
369   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
370   PetscScalar       *darray,*d2d,v;
371   const PetscInt    *ranges;
372   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
373 
374   PetscFunctionBegin;
375   CHKERRQ(VecGetArray(D,&darray));
376 
377   if (A->rmap->N<=A->cmap->N) {   /* row version */
378 
379     /* create ScaLAPACK descriptor for vector (1d block distribution) */
380     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
381     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
382     dlld = PetscMax(1,A->rmap->n);
383     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
384     PetscCheckScaLapackInfo("descinit",info);
385 
386     /* allocate 2d vector */
387     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
388     CHKERRQ(PetscCalloc1(lszd,&d2d));
389     dlld = PetscMax(1,lszd);
390 
391     /* create ScaLAPACK descriptor for vector (2d block distribution) */
392     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
393     PetscCheckScaLapackInfo("descinit",info);
394 
395     /* collect diagonal */
396     for (j=1;j<=a->M;j++) {
397       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
398       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
399     }
400 
401     /* redistribute d from a column of a 2d matrix */
402     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
403     CHKERRQ(PetscFree(d2d));
404 
405   } else {   /* column version */
406 
407     /* create ScaLAPACK descriptor for vector (1d block distribution) */
408     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
409     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
410     dlld = 1;
411     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
412     PetscCheckScaLapackInfo("descinit",info);
413 
414     /* allocate 2d vector */
415     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
416     CHKERRQ(PetscCalloc1(lszd,&d2d));
417 
418     /* create ScaLAPACK descriptor for vector (2d block distribution) */
419     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
420     PetscCheckScaLapackInfo("descinit",info);
421 
422     /* collect diagonal */
423     for (j=1;j<=a->N;j++) {
424       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
425       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
426     }
427 
428     /* redistribute d from a row of a 2d matrix */
429     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
430     CHKERRQ(PetscFree(d2d));
431   }
432 
433   CHKERRQ(VecRestoreArray(D,&darray));
434   CHKERRQ(VecAssemblyBegin(D));
435   CHKERRQ(VecAssemblyEnd(D));
436   PetscFunctionReturn(0);
437 }
438 
439 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
440 {
441   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
442   const PetscScalar *d;
443   const PetscInt    *ranges;
444   PetscScalar       *d2d;
445   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
446 
447   PetscFunctionBegin;
448   if (R) {
449     CHKERRQ(VecGetArrayRead(R,(const PetscScalar **)&d));
450     /* create ScaLAPACK descriptor for vector (1d block distribution) */
451     CHKERRQ(PetscLayoutGetRanges(A->cmap,&ranges));
452     CHKERRQ(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
453     dlld = 1;
454     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
455     PetscCheckScaLapackInfo("descinit",info);
456 
457     /* allocate 2d vector */
458     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
459     CHKERRQ(PetscCalloc1(lszd,&d2d));
460 
461     /* create ScaLAPACK descriptor for vector (2d block distribution) */
462     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
463     PetscCheckScaLapackInfo("descinit",info);
464 
465     /* redistribute d to a row of a 2d matrix */
466     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
467 
468     /* broadcast along process columns */
469     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
470     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
471 
472     /* local scaling */
473     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
474 
475     CHKERRQ(PetscFree(d2d));
476     CHKERRQ(VecRestoreArrayRead(R,(const PetscScalar **)&d));
477   }
478   if (L) {
479     CHKERRQ(VecGetArrayRead(L,(const PetscScalar **)&d));
480     /* create ScaLAPACK descriptor for vector (1d block distribution) */
481     CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
482     CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
483     dlld = PetscMax(1,A->rmap->n);
484     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
485     PetscCheckScaLapackInfo("descinit",info);
486 
487     /* allocate 2d vector */
488     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
489     CHKERRQ(PetscCalloc1(lszd,&d2d));
490     dlld = PetscMax(1,lszd);
491 
492     /* create ScaLAPACK descriptor for vector (2d block distribution) */
493     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
494     PetscCheckScaLapackInfo("descinit",info);
495 
496     /* redistribute d to a column of a 2d matrix */
497     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
498 
499     /* broadcast along process rows */
500     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
501     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
502 
503     /* local scaling */
504     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
505 
506     CHKERRQ(PetscFree(d2d));
507     CHKERRQ(VecRestoreArrayRead(L,(const PetscScalar **)&d));
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
513 {
514   PetscFunctionBegin;
515   *missing = PETSC_FALSE;
516   PetscFunctionReturn(0);
517 }
518 
519 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
520 {
521   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
522   PetscBLASInt  n,one=1;
523 
524   PetscFunctionBegin;
525   n = x->lld*x->locc;
526   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
527   PetscFunctionReturn(0);
528 }
529 
530 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
531 {
532   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
533   PetscBLASInt  i,n;
534   PetscScalar   v;
535 
536   PetscFunctionBegin;
537   n = PetscMin(x->M,x->N);
538   for (i=1;i<=n;i++) {
539     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
540     v += alpha;
541     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
547 {
548   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
549   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
550   PetscBLASInt   one=1;
551   PetscScalar    beta=1.0;
552 
553   PetscFunctionBegin;
554   MatScaLAPACKCheckDistribution(Y,1,X,3);
555   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
556   CHKERRQ(PetscObjectStateIncrease((PetscObject)Y));
557   PetscFunctionReturn(0);
558 }
559 
560 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
561 {
562   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
563   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
564 
565   PetscFunctionBegin;
566   CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
567   CHKERRQ(PetscObjectStateIncrease((PetscObject)B));
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
572 {
573   Mat            Bs;
574   MPI_Comm       comm;
575   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
576 
577   PetscFunctionBegin;
578   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
579   CHKERRQ(MatCreate(comm,&Bs));
580   CHKERRQ(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
581   CHKERRQ(MatSetType(Bs,MATSCALAPACK));
582   b = (Mat_ScaLAPACK*)Bs->data;
583   b->M    = a->M;
584   b->N    = a->N;
585   b->mb   = a->mb;
586   b->nb   = a->nb;
587   b->rsrc = a->rsrc;
588   b->csrc = a->csrc;
589   CHKERRQ(MatSetUp(Bs));
590   *B = Bs;
591   if (op == MAT_COPY_VALUES) {
592     CHKERRQ(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
593   }
594   Bs->assembled = PETSC_TRUE;
595   PetscFunctionReturn(0);
596 }
597 
598 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
599 {
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
611     *B = Bs;
612   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
613   b = (Mat_ScaLAPACK*)Bs->data;
614   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
615 #if defined(PETSC_USE_COMPLEX)
616   /* undo conjugation */
617   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
618 #endif
619   Bs->assembled = PETSC_TRUE;
620   PetscFunctionReturn(0);
621 }
622 
623 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
624 {
625   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
626   PetscInt      i,j;
627 
628   PetscFunctionBegin;
629   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
630   PetscFunctionReturn(0);
631 }
632 
633 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
634 {
635   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
636   Mat            Bs = *B;
637   PetscBLASInt   one=1;
638   PetscScalar    sone=1.0,zero=0.0;
639 
640   PetscFunctionBegin;
641   if (reuse == MAT_INITIAL_MATRIX) {
642     CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
643     *B = Bs;
644   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
645   b = (Mat_ScaLAPACK*)Bs->data;
646   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
647   Bs->assembled = PETSC_TRUE;
648   PetscFunctionReturn(0);
649 }
650 
651 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
652 {
653   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
654   PetscScalar    *x,*x2d;
655   const PetscInt *ranges;
656   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
657 
658   PetscFunctionBegin;
659   CHKERRQ(VecCopy(B,X));
660   CHKERRQ(VecGetArray(X,&x));
661 
662   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
663   CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
664   CHKERRQ(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
665   xlld = PetscMax(1,A->rmap->n);
666   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
667   PetscCheckScaLapackInfo("descinit",info);
668 
669   /* allocate 2d vector */
670   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
671   CHKERRQ(PetscMalloc1(lszx,&x2d));
672   xlld = PetscMax(1,lszx);
673 
674   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
675   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
676   PetscCheckScaLapackInfo("descinit",info);
677 
678   /* redistribute x as a column of a 2d matrix */
679   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
680 
681   /* call ScaLAPACK subroutine */
682   switch (A->factortype) {
683     case MAT_FACTOR_LU:
684       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
685       PetscCheckScaLapackInfo("getrs",info);
686       break;
687     case MAT_FACTOR_CHOLESKY:
688       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
689       PetscCheckScaLapackInfo("potrs",info);
690       break;
691     default:
692       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
693   }
694 
695   /* redistribute x from a column of a 2d matrix */
696   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
697 
698   CHKERRQ(PetscFree(x2d));
699   CHKERRQ(VecRestoreArray(X,&x));
700   PetscFunctionReturn(0);
701 }
702 
703 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
704 {
705   PetscFunctionBegin;
706   CHKERRQ(MatSolve_ScaLAPACK(A,B,X));
707   CHKERRQ(VecAXPY(X,1,Y));
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
712 {
713   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
714   PetscBool      flg1,flg2;
715   PetscBLASInt   one=1,info;
716 
717   PetscFunctionBegin;
718   CHKERRQ(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
719   CHKERRQ(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
720   PetscCheckFalse(!(flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
721   MatScaLAPACKCheckDistribution(B,1,X,2);
722   b = (Mat_ScaLAPACK*)B->data;
723   x = (Mat_ScaLAPACK*)X->data;
724   CHKERRQ(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
725 
726   switch (A->factortype) {
727     case MAT_FACTOR_LU:
728       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
729       PetscCheckScaLapackInfo("getrs",info);
730       break;
731     case MAT_FACTOR_CHOLESKY:
732       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
733       PetscCheckScaLapackInfo("potrs",info);
734       break;
735     default:
736       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
742 {
743   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
744   PetscBLASInt   one=1,info;
745 
746   PetscFunctionBegin;
747   if (!a->pivots) {
748     CHKERRQ(PetscMalloc1(a->locr+a->mb,&a->pivots));
749     CHKERRQ(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
750   }
751   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
752   PetscCheckScaLapackInfo("getrf",info);
753   A->factortype = MAT_FACTOR_LU;
754   A->assembled  = PETSC_TRUE;
755 
756   CHKERRQ(PetscFree(A->solvertype));
757   CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
758   PetscFunctionReturn(0);
759 }
760 
761 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
762 {
763   PetscFunctionBegin;
764   CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN));
765   CHKERRQ(MatLUFactor_ScaLAPACK(F,0,0,info));
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
770 {
771   PetscFunctionBegin;
772   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
773   PetscFunctionReturn(0);
774 }
775 
776 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
777 {
778   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
779   PetscBLASInt   one=1,info;
780 
781   PetscFunctionBegin;
782   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
783   PetscCheckScaLapackInfo("potrf",info);
784   A->factortype = MAT_FACTOR_CHOLESKY;
785   A->assembled  = PETSC_TRUE;
786 
787   CHKERRQ(PetscFree(A->solvertype));
788   CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
793 {
794   PetscFunctionBegin;
795   CHKERRQ(MatCopy(A,F,SAME_NONZERO_PATTERN));
796   CHKERRQ(MatCholeskyFactor_ScaLAPACK(F,0,info));
797   PetscFunctionReturn(0);
798 }
799 
800 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
801 {
802   PetscFunctionBegin;
803   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
808 {
809   PetscFunctionBegin;
810   *type = MATSOLVERSCALAPACK;
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
815 {
816   Mat            B;
817   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
818 
819   PetscFunctionBegin;
820   /* Create the factorization matrix */
821   CHKERRQ(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
822   B->trivialsymbolic = PETSC_TRUE;
823   B->factortype = ftype;
824   CHKERRQ(PetscFree(B->solvertype));
825   CHKERRQ(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
826 
827   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
828   *F = B;
829   PetscFunctionReturn(0);
830 }
831 
832 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
833 {
834   PetscFunctionBegin;
835   CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
836   CHKERRQ(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
837   PetscFunctionReturn(0);
838 }
839 
840 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
841 {
842   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
843   PetscBLASInt   one=1,lwork=0;
844   const char     *ntype;
845   PetscScalar    *work=NULL,dummy;
846 
847   PetscFunctionBegin;
848   switch (type) {
849     case NORM_1:
850       ntype = "1";
851       lwork = PetscMax(a->locr,a->locc);
852       break;
853     case NORM_FROBENIUS:
854       ntype = "F";
855       work  = &dummy;
856       break;
857     case NORM_INFINITY:
858       ntype = "I";
859       lwork = PetscMax(a->locr,a->locc);
860       break;
861     default:
862       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
863   }
864   if (lwork) CHKERRQ(PetscMalloc1(lwork,&work));
865   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
866   if (lwork) CHKERRQ(PetscFree(work));
867   PetscFunctionReturn(0);
868 }
869 
870 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
871 {
872   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
873 
874   PetscFunctionBegin;
875   CHKERRQ(PetscArrayzero(a->loc,a->lld*a->locc));
876   PetscFunctionReturn(0);
877 }
878 
879 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
880 {
881   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
882   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
883 
884   PetscFunctionBegin;
885   if (rows) {
886     n     = a->locr;
887     nb    = a->mb;
888     isrc  = a->rsrc;
889     nproc = a->grid->nprow;
890     iproc = a->grid->myrow;
891     CHKERRQ(PetscMalloc1(n,&idx));
892     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
893     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
894   }
895   if (cols) {
896     n     = a->locc;
897     nb    = a->nb;
898     isrc  = a->csrc;
899     nproc = a->grid->npcol;
900     iproc = a->grid->mycol;
901     CHKERRQ(PetscMalloc1(n,&idx));
902     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
903     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
909 {
910   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
911   Mat            Bmpi;
912   MPI_Comm       comm;
913   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
914   const PetscInt *ranges,*branges,*cwork;
915   const PetscScalar *vwork;
916   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
917   PetscScalar    *barray;
918   PetscBool      differ=PETSC_FALSE;
919   PetscMPIInt    size;
920 
921   PetscFunctionBegin;
922   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
923   CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
924 
925   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
926     CHKERRMPI(MPI_Comm_size(comm,&size));
927     CHKERRQ(PetscLayoutGetRanges((*B)->rmap,&branges));
928     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
929   }
930 
931   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
932     CHKERRQ(MatCreate(comm,&Bmpi));
933     m = PETSC_DECIDE;
934     CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
935     n = PETSC_DECIDE;
936     CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
937     CHKERRQ(MatSetSizes(Bmpi,m,n,M,N));
938     CHKERRQ(MatSetType(Bmpi,MATDENSE));
939     CHKERRQ(MatSetUp(Bmpi));
940 
941     /* create ScaLAPACK descriptor for B (1d block distribution) */
942     CHKERRQ(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
943     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
944     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
945     PetscCheckScaLapackInfo("descinit",info);
946 
947     /* redistribute matrix */
948     CHKERRQ(MatDenseGetArray(Bmpi,&barray));
949     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
950     CHKERRQ(MatDenseRestoreArray(Bmpi,&barray));
951     CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
952     CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
953 
954     /* transfer rows of auxiliary matrix to the final matrix B */
955     CHKERRQ(MatGetOwnershipRange(Bmpi,&rstart,&rend));
956     for (i=rstart;i<rend;i++) {
957       CHKERRQ(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
958       CHKERRQ(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
959       CHKERRQ(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
960     }
961     CHKERRQ(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
962     CHKERRQ(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
963     CHKERRQ(MatDestroy(&Bmpi));
964 
965   } else {  /* normal cases */
966 
967     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
968     else {
969       CHKERRQ(MatCreate(comm,&Bmpi));
970       m = PETSC_DECIDE;
971       CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
972       n = PETSC_DECIDE;
973       CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
974       CHKERRQ(MatSetSizes(Bmpi,m,n,M,N));
975       CHKERRQ(MatSetType(Bmpi,MATDENSE));
976       CHKERRQ(MatSetUp(Bmpi));
977     }
978 
979     /* create ScaLAPACK descriptor for B (1d block distribution) */
980     CHKERRQ(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
981     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
982     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
983     PetscCheckScaLapackInfo("descinit",info);
984 
985     /* redistribute matrix */
986     CHKERRQ(MatDenseGetArray(Bmpi,&barray));
987     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
988     CHKERRQ(MatDenseRestoreArray(Bmpi,&barray));
989 
990     CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
991     CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
992     if (reuse == MAT_INPLACE_MATRIX) {
993       CHKERRQ(MatHeaderReplace(A,&Bmpi));
994     } else *B = Bmpi;
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1000 {
1001   Mat_ScaLAPACK  *b;
1002   Mat            Bmpi;
1003   MPI_Comm       comm;
1004   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1005   const PetscInt *ranges;
1006   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1007   PetscScalar    *aarray;
1008   PetscInt       lda;
1009 
1010   PetscFunctionBegin;
1011   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
1012 
1013   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1014   else {
1015     CHKERRQ(MatCreate(comm,&Bmpi));
1016     m = PETSC_DECIDE;
1017     CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
1018     n = PETSC_DECIDE;
1019     CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
1020     CHKERRQ(MatSetSizes(Bmpi,m,n,M,N));
1021     CHKERRQ(MatSetType(Bmpi,MATSCALAPACK));
1022     CHKERRQ(MatSetUp(Bmpi));
1023   }
1024   b = (Mat_ScaLAPACK*)Bmpi->data;
1025 
1026   /* create ScaLAPACK descriptor for A (1d block distribution) */
1027   CHKERRQ(PetscLayoutGetRanges(A->rmap,&ranges));
1028   CHKERRQ(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
1029   CHKERRQ(MatDenseGetLDA(A,&lda));
1030   lld = PetscMax(lda,1);  /* local leading dimension */
1031   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1032   PetscCheckScaLapackInfo("descinit",info);
1033 
1034   /* redistribute matrix */
1035   CHKERRQ(MatDenseGetArray(A,&aarray));
1036   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1037   CHKERRQ(MatDenseRestoreArray(A,&aarray));
1038 
1039   CHKERRQ(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1040   CHKERRQ(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1041   if (reuse == MAT_INPLACE_MATRIX) {
1042     CHKERRQ(MatHeaderReplace(A,&Bmpi));
1043   } else *B = Bmpi;
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1048 {
1049   Mat               mat_scal;
1050   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1051   const PetscInt    *cols;
1052   const PetscScalar *vals;
1053 
1054   PetscFunctionBegin;
1055   if (reuse == MAT_REUSE_MATRIX) {
1056     mat_scal = *newmat;
1057     CHKERRQ(MatZeroEntries(mat_scal));
1058   } else {
1059     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1060     m = PETSC_DECIDE;
1061     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1062     n = PETSC_DECIDE;
1063     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1064     CHKERRQ(MatSetSizes(mat_scal,m,n,M,N));
1065     CHKERRQ(MatSetType(mat_scal,MATSCALAPACK));
1066     CHKERRQ(MatSetUp(mat_scal));
1067   }
1068   for (row=rstart;row<rend;row++) {
1069     CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals));
1070     CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
1071     CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals));
1072   }
1073   CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1074   CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1075 
1076   if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal));
1077   else *newmat = mat_scal;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1082 {
1083   Mat               mat_scal;
1084   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1085   const PetscInt    *cols;
1086   const PetscScalar *vals;
1087   PetscScalar       v;
1088 
1089   PetscFunctionBegin;
1090   if (reuse == MAT_REUSE_MATRIX) {
1091     mat_scal = *newmat;
1092     CHKERRQ(MatZeroEntries(mat_scal));
1093   } else {
1094     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1095     m = PETSC_DECIDE;
1096     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1097     n = PETSC_DECIDE;
1098     CHKERRQ(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1099     CHKERRQ(MatSetSizes(mat_scal,m,n,M,N));
1100     CHKERRQ(MatSetType(mat_scal,MATSCALAPACK));
1101     CHKERRQ(MatSetUp(mat_scal));
1102   }
1103   CHKERRQ(MatGetRowUpperTriangular(A));
1104   for (row=rstart;row<rend;row++) {
1105     CHKERRQ(MatGetRow(A,row,&ncols,&cols,&vals));
1106     CHKERRQ(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1107     for (j=0;j<ncols;j++) { /* lower triangular part */
1108       if (cols[j] == row) continue;
1109       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1110       CHKERRQ(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1111     }
1112     CHKERRQ(MatRestoreRow(A,row,&ncols,&cols,&vals));
1113   }
1114   CHKERRQ(MatRestoreRowUpperTriangular(A));
1115   CHKERRQ(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1116   CHKERRQ(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1117 
1118   if (reuse == MAT_INPLACE_MATRIX) CHKERRQ(MatHeaderReplace(A,&mat_scal));
1119   else *newmat = mat_scal;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1124 {
1125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1126   PetscInt       sz=0;
1127 
1128   PetscFunctionBegin;
1129   CHKERRQ(PetscLayoutSetUp(A->rmap));
1130   CHKERRQ(PetscLayoutSetUp(A->cmap));
1131   if (!a->lld) a->lld = a->locr;
1132 
1133   CHKERRQ(PetscFree(a->loc));
1134   CHKERRQ(PetscIntMultError(a->lld,a->locc,&sz));
1135   CHKERRQ(PetscCalloc1(sz,&a->loc));
1136   CHKERRQ(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1137 
1138   A->preallocated = PETSC_TRUE;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1143 {
1144   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1145   Mat_ScaLAPACK_Grid *grid;
1146   PetscBool          flg;
1147   MPI_Comm           icomm;
1148 
1149   PetscFunctionBegin;
1150   CHKERRQ(MatStashDestroy_Private(&A->stash));
1151   CHKERRQ(PetscFree(a->loc));
1152   CHKERRQ(PetscFree(a->pivots));
1153   CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1154   CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1155   if (--grid->grid_refct == 0) {
1156     Cblacs_gridexit(grid->ictxt);
1157     Cblacs_gridexit(grid->ictxrow);
1158     Cblacs_gridexit(grid->ictxcol);
1159     CHKERRQ(PetscFree(grid));
1160     CHKERRMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1161   }
1162   CHKERRQ(PetscCommDestroy(&icomm));
1163   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1164   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1165   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
1166   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
1167   CHKERRQ(PetscFree(A->data));
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1172 {
1173   const PetscInt *ranges;
1174   PetscMPIInt    size;
1175   PetscInt       i,n;
1176 
1177   PetscFunctionBegin;
1178   CHKERRMPI(MPI_Comm_size(map->comm,&size));
1179   if (size>2) {
1180     CHKERRQ(PetscLayoutGetRanges(map,&ranges));
1181     n = ranges[1]-ranges[0];
1182     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1183     PetscCheckFalse(i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1189 {
1190   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1191   PetscBLASInt   info=0;
1192 
1193   PetscFunctionBegin;
1194   CHKERRQ(PetscLayoutSetUp(A->rmap));
1195   CHKERRQ(PetscLayoutSetUp(A->cmap));
1196 
1197   /* check that the layout is as enforced by MatCreateScaLAPACK */
1198   CHKERRQ(MatScaLAPACKCheckLayout(A->rmap));
1199   CHKERRQ(MatScaLAPACKCheckLayout(A->cmap));
1200 
1201   /* compute local sizes */
1202   CHKERRQ(PetscBLASIntCast(A->rmap->N,&a->M));
1203   CHKERRQ(PetscBLASIntCast(A->cmap->N,&a->N));
1204   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1205   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1206   a->lld  = PetscMax(1,a->locr);
1207 
1208   /* allocate local array */
1209   CHKERRQ(MatScaLAPACKSetPreallocation(A));
1210 
1211   /* set up ScaLAPACK descriptor */
1212   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1213   PetscCheckScaLapackInfo("descinit",info);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1218 {
1219   PetscInt       nstash,reallocs;
1220 
1221   PetscFunctionBegin;
1222   if (A->nooffprocentries) PetscFunctionReturn(0);
1223   CHKERRQ(MatStashScatterBegin_Private(A,&A->stash,NULL));
1224   CHKERRQ(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
1225   CHKERRQ(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1230 {
1231   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1232   PetscMPIInt    n;
1233   PetscInt       i,flg,*row,*col;
1234   PetscScalar    *val;
1235   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1236 
1237   PetscFunctionBegin;
1238   if (A->nooffprocentries) PetscFunctionReturn(0);
1239   while (1) {
1240     CHKERRQ(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1241     if (!flg) break;
1242     for (i=0;i<n;i++) {
1243       CHKERRQ(PetscBLASIntCast(row[i]+1,&gridx));
1244       CHKERRQ(PetscBLASIntCast(col[i]+1,&gcidx));
1245       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1246       PetscCheckFalse(rsrc!=a->grid->myrow || csrc!=a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1247       switch (A->insertmode) {
1248         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1249         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1250         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1251       }
1252     }
1253   }
1254   CHKERRQ(MatStashScatterEnd_Private(&A->stash));
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1259 {
1260   Mat            Adense,As;
1261   MPI_Comm       comm;
1262 
1263   PetscFunctionBegin;
1264   CHKERRQ(PetscObjectGetComm((PetscObject)newMat,&comm));
1265   CHKERRQ(MatCreate(comm,&Adense));
1266   CHKERRQ(MatSetType(Adense,MATDENSE));
1267   CHKERRQ(MatLoad(Adense,viewer));
1268   CHKERRQ(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
1269   CHKERRQ(MatDestroy(&Adense));
1270   CHKERRQ(MatHeaderReplace(newMat,&As));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /* -------------------------------------------------------------------*/
1275 static struct _MatOps MatOps_Values = {
1276        MatSetValues_ScaLAPACK,
1277        0,
1278        0,
1279        MatMult_ScaLAPACK,
1280 /* 4*/ MatMultAdd_ScaLAPACK,
1281        MatMultTranspose_ScaLAPACK,
1282        MatMultTransposeAdd_ScaLAPACK,
1283        MatSolve_ScaLAPACK,
1284        MatSolveAdd_ScaLAPACK,
1285        0,
1286 /*10*/ 0,
1287        MatLUFactor_ScaLAPACK,
1288        MatCholeskyFactor_ScaLAPACK,
1289        0,
1290        MatTranspose_ScaLAPACK,
1291 /*15*/ MatGetInfo_ScaLAPACK,
1292        0,
1293        MatGetDiagonal_ScaLAPACK,
1294        MatDiagonalScale_ScaLAPACK,
1295        MatNorm_ScaLAPACK,
1296 /*20*/ MatAssemblyBegin_ScaLAPACK,
1297        MatAssemblyEnd_ScaLAPACK,
1298        MatSetOption_ScaLAPACK,
1299        MatZeroEntries_ScaLAPACK,
1300 /*24*/ 0,
1301        MatLUFactorSymbolic_ScaLAPACK,
1302        MatLUFactorNumeric_ScaLAPACK,
1303        MatCholeskyFactorSymbolic_ScaLAPACK,
1304        MatCholeskyFactorNumeric_ScaLAPACK,
1305 /*29*/ MatSetUp_ScaLAPACK,
1306        0,
1307        0,
1308        0,
1309        0,
1310 /*34*/ MatDuplicate_ScaLAPACK,
1311        0,
1312        0,
1313        0,
1314        0,
1315 /*39*/ MatAXPY_ScaLAPACK,
1316        0,
1317        0,
1318        0,
1319        MatCopy_ScaLAPACK,
1320 /*44*/ 0,
1321        MatScale_ScaLAPACK,
1322        MatShift_ScaLAPACK,
1323        0,
1324        0,
1325 /*49*/ 0,
1326        0,
1327        0,
1328        0,
1329        0,
1330 /*54*/ 0,
1331        0,
1332        0,
1333        0,
1334        0,
1335 /*59*/ 0,
1336        MatDestroy_ScaLAPACK,
1337        MatView_ScaLAPACK,
1338        0,
1339        0,
1340 /*64*/ 0,
1341        0,
1342        0,
1343        0,
1344        0,
1345 /*69*/ 0,
1346        0,
1347        MatConvert_ScaLAPACK_Dense,
1348        0,
1349        0,
1350 /*74*/ 0,
1351        0,
1352        0,
1353        0,
1354        0,
1355 /*79*/ 0,
1356        0,
1357        0,
1358        0,
1359        MatLoad_ScaLAPACK,
1360 /*84*/ 0,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*89*/ 0,
1366        0,
1367        MatMatMultNumeric_ScaLAPACK,
1368        0,
1369        0,
1370 /*94*/ 0,
1371        0,
1372        0,
1373        MatMatTransposeMultNumeric_ScaLAPACK,
1374        0,
1375 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1376        0,
1377        0,
1378        MatConjugate_ScaLAPACK,
1379        0,
1380 /*104*/0,
1381        0,
1382        0,
1383        0,
1384        0,
1385 /*109*/MatMatSolve_ScaLAPACK,
1386        0,
1387        0,
1388        0,
1389        MatMissingDiagonal_ScaLAPACK,
1390 /*114*/0,
1391        0,
1392        0,
1393        0,
1394        0,
1395 /*119*/0,
1396        MatHermitianTranspose_ScaLAPACK,
1397        0,
1398        0,
1399        0,
1400 /*124*/0,
1401        0,
1402        0,
1403        0,
1404        0,
1405 /*129*/0,
1406        0,
1407        0,
1408        0,
1409        0,
1410 /*134*/0,
1411        0,
1412        0,
1413        0,
1414        0,
1415        0,
1416 /*140*/0,
1417        0,
1418        0,
1419        0,
1420        0,
1421 /*145*/0,
1422        0,
1423        0
1424 };
1425 
1426 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1427 {
1428   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1429   PetscInt           size=stash->size,nsends;
1430   PetscInt           count,*sindices,**rindices,i,j,l;
1431   PetscScalar        **rvalues,*svalues;
1432   MPI_Comm           comm = stash->comm;
1433   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1434   PetscMPIInt        *sizes,*nlengths,nreceives;
1435   PetscInt           *sp_idx,*sp_idy;
1436   PetscScalar        *sp_val;
1437   PetscMatStashSpace space,space_next;
1438   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1439   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1440 
1441   PetscFunctionBegin;
1442   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1443     InsertMode addv;
1444     CHKERRMPI(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
1445     PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1446     mat->insertmode = addv; /* in case this processor had no cache */
1447   }
1448 
1449   bs2 = stash->bs*stash->bs;
1450 
1451   /*  first count number of contributors to each processor */
1452   CHKERRQ(PetscCalloc1(size,&nlengths));
1453   CHKERRQ(PetscMalloc1(stash->n+1,&owner));
1454 
1455   i     = j    = 0;
1456   space = stash->space_head;
1457   while (space) {
1458     space_next = space->next;
1459     for (l=0; l<space->local_used; l++) {
1460       CHKERRQ(PetscBLASIntCast(space->idx[l]+1,&gridx));
1461       CHKERRQ(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1462       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1463       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1464       nlengths[j]++; owner[i] = j;
1465       i++;
1466     }
1467     space = space_next;
1468   }
1469 
1470   /* Now check what procs get messages - and compute nsends. */
1471   CHKERRQ(PetscCalloc1(size,&sizes));
1472   for (i=0, nsends=0; i<size; i++) {
1473     if (nlengths[i]) {
1474       sizes[i] = 1; nsends++;
1475     }
1476   }
1477 
1478   {PetscMPIInt *onodes,*olengths;
1479    /* Determine the number of messages to expect, their lengths, from from-ids */
1480    CHKERRQ(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
1481    CHKERRQ(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1482    /* since clubbing row,col - lengths are multiplied by 2 */
1483    for (i=0; i<nreceives; i++) olengths[i] *=2;
1484    CHKERRQ(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1485    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1486    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1487    CHKERRQ(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
1488    CHKERRQ(PetscFree(onodes));
1489    CHKERRQ(PetscFree(olengths));}
1490 
1491   /* do sends:
1492       1) starts[i] gives the starting index in svalues for stuff going to
1493          the ith processor
1494   */
1495   CHKERRQ(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
1496   CHKERRQ(PetscMalloc1(2*nsends,&send_waits));
1497   CHKERRQ(PetscMalloc2(size,&startv,size,&starti));
1498   /* use 2 sends the first with all_a, the next with all_i and all_j */
1499   startv[0] = 0; starti[0] = 0;
1500   for (i=1; i<size; i++) {
1501     startv[i] = startv[i-1] + nlengths[i-1];
1502     starti[i] = starti[i-1] + 2*nlengths[i-1];
1503   }
1504 
1505   i     = 0;
1506   space = stash->space_head;
1507   while (space) {
1508     space_next = space->next;
1509     sp_idx     = space->idx;
1510     sp_idy     = space->idy;
1511     sp_val     = space->val;
1512     for (l=0; l<space->local_used; l++) {
1513       j = owner[i];
1514       if (bs2 == 1) {
1515         svalues[startv[j]] = sp_val[l];
1516       } else {
1517         PetscInt    k;
1518         PetscScalar *buf1,*buf2;
1519         buf1 = svalues+bs2*startv[j];
1520         buf2 = space->val + bs2*l;
1521         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1522       }
1523       sindices[starti[j]]             = sp_idx[l];
1524       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1525       startv[j]++;
1526       starti[j]++;
1527       i++;
1528     }
1529     space = space_next;
1530   }
1531   startv[0] = 0;
1532   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1533 
1534   for (i=0,count=0; i<size; i++) {
1535     if (sizes[i]) {
1536       CHKERRMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
1537       CHKERRMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1538     }
1539   }
1540 #if defined(PETSC_USE_INFO)
1541   CHKERRQ(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1542   for (i=0; i<size; i++) {
1543     if (sizes[i]) {
1544       CHKERRQ(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1545     }
1546   }
1547 #endif
1548   CHKERRQ(PetscFree(nlengths));
1549   CHKERRQ(PetscFree(owner));
1550   CHKERRQ(PetscFree2(startv,starti));
1551   CHKERRQ(PetscFree(sizes));
1552 
1553   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1554   CHKERRQ(PetscMalloc1(2*nreceives,&recv_waits));
1555 
1556   for (i=0; i<nreceives; i++) {
1557     recv_waits[2*i]   = recv_waits1[i];
1558     recv_waits[2*i+1] = recv_waits2[i];
1559   }
1560   stash->recv_waits = recv_waits;
1561 
1562   CHKERRQ(PetscFree(recv_waits1));
1563   CHKERRQ(PetscFree(recv_waits2));
1564 
1565   stash->svalues         = svalues;
1566   stash->sindices        = sindices;
1567   stash->rvalues         = rvalues;
1568   stash->rindices        = rindices;
1569   stash->send_waits      = send_waits;
1570   stash->nsends          = nsends;
1571   stash->nrecvs          = nreceives;
1572   stash->reproduce_count = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1577 {
1578   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1579 
1580   PetscFunctionBegin;
1581   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1582   PetscCheckFalse(mb<1 && mb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
1583   PetscCheckFalse(nb<1 && nb!=PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
1584   CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1585   CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 /*@
1590    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1591    the ScaLAPACK matrix
1592 
1593    Logically Collective on A
1594 
1595    Input Parameters:
1596 +  A  - a MATSCALAPACK matrix
1597 .  mb - the row block size
1598 -  nb - the column block size
1599 
1600    Level: intermediate
1601 
1602 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1603 @*/
1604 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1605 {
1606   PetscFunctionBegin;
1607   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1608   PetscValidLogicalCollectiveInt(A,mb,2);
1609   PetscValidLogicalCollectiveInt(A,nb,3);
1610   CHKERRQ(PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)));
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1615 {
1616   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1617 
1618   PetscFunctionBegin;
1619   if (mb) *mb = a->mb;
1620   if (nb) *nb = a->nb;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 /*@
1625    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1626    the ScaLAPACK matrix
1627 
1628    Not collective
1629 
1630    Input Parameter:
1631 .  A  - a MATSCALAPACK matrix
1632 
1633    Output Parameters:
1634 +  mb - the row block size
1635 -  nb - the column block size
1636 
1637    Level: intermediate
1638 
1639 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1640 @*/
1641 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1642 {
1643   PetscFunctionBegin;
1644   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1645   CHKERRQ(PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)));
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1650 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1651 
1652 /*MC
1653    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1654 
1655    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1656 
1657    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1658 
1659    Options Database Keys:
1660 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1661 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1662 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1663 
1664    Level: beginner
1665 
1666 .seealso: MATDENSE, MATELEMENTAL
1667 M*/
1668 
1669 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1670 {
1671   Mat_ScaLAPACK      *a;
1672   PetscErrorCode     ierr;
1673   PetscBool          flg,flg1;
1674   Mat_ScaLAPACK_Grid *grid;
1675   MPI_Comm           icomm;
1676   PetscBLASInt       nprow,npcol,myrow,mycol;
1677   PetscInt           optv1,k=2,array[2]={0,0};
1678   PetscMPIInt        size;
1679 
1680   PetscFunctionBegin;
1681   CHKERRQ(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1682   A->insertmode = NOT_SET_VALUES;
1683 
1684   CHKERRQ(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1685   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1686   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1687   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1688   A->stash.ScatterDestroy = NULL;
1689 
1690   CHKERRQ(PetscNewLog(A,&a));
1691   A->data = (void*)a;
1692 
1693   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1694   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1695     CHKERRMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
1696     CHKERRQ(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1697     CHKERRQ(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1698   }
1699   CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1700   CHKERRMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1701   if (!flg) {
1702     CHKERRQ(PetscNewLog(A,&grid));
1703 
1704     CHKERRMPI(MPI_Comm_size(icomm,&size));
1705     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1706 
1707     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1708     CHKERRQ(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1709     if (flg1) {
1710       PetscCheckFalse(size % optv1,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1711       grid->nprow = optv1;
1712     }
1713     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1714 
1715     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1716     grid->npcol = size/grid->nprow;
1717     CHKERRQ(PetscBLASIntCast(grid->nprow,&nprow));
1718     CHKERRQ(PetscBLASIntCast(grid->npcol,&npcol));
1719     grid->ictxt = Csys2blacs_handle(icomm);
1720     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1721     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1722     grid->grid_refct = 1;
1723     grid->nprow      = nprow;
1724     grid->npcol      = npcol;
1725     grid->myrow      = myrow;
1726     grid->mycol      = mycol;
1727     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1728     grid->ictxrow = Csys2blacs_handle(icomm);
1729     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1730     grid->ictxcol = Csys2blacs_handle(icomm);
1731     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1732     CHKERRMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1733 
1734   } else grid->grid_refct++;
1735   CHKERRQ(PetscCommDestroy(&icomm));
1736   a->grid = grid;
1737   a->mb   = DEFAULT_BLOCKSIZE;
1738   a->nb   = DEFAULT_BLOCKSIZE;
1739 
1740   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1741   CHKERRQ(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1742   if (flg) {
1743     a->mb = array[0];
1744     a->nb = (k>1)? array[1]: a->mb;
1745   }
1746   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1747 
1748   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
1749   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
1750   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
1751   CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 /*@C
1756    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1757    (2D block cyclic distribution).
1758 
1759    Collective
1760 
1761    Input Parameters:
1762 +  comm - MPI communicator
1763 .  mb   - row block size (or PETSC_DECIDE to have it set)
1764 .  nb   - column block size (or PETSC_DECIDE to have it set)
1765 .  M    - number of global rows
1766 .  N    - number of global columns
1767 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1768 -  csrc - coordinate of process that owns the first column of the distributed matrix
1769 
1770    Output Parameter:
1771 .  A - the matrix
1772 
1773    Options Database Keys:
1774 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1775 
1776    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1777    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1778    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1779 
1780    Notes:
1781    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1782    is chosen.
1783 
1784    Storage Information:
1785    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1786    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1787    significance and are thus ignored. The block sizes refer to the values
1788    used for the distributed matrix, not the same meaning as in BAIJ.
1789 
1790    Level: intermediate
1791 
1792 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1793 @*/
1794 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1795 {
1796   Mat_ScaLAPACK  *a;
1797   PetscInt       m,n;
1798 
1799   PetscFunctionBegin;
1800   CHKERRQ(MatCreate(comm,A));
1801   CHKERRQ(MatSetType(*A,MATSCALAPACK));
1802   PetscCheckFalse(M==PETSC_DECIDE || N==PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1803   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1804   m = PETSC_DECIDE;
1805   CHKERRQ(PetscSplitOwnershipEqual(comm,&m,&M));
1806   n = PETSC_DECIDE;
1807   CHKERRQ(PetscSplitOwnershipEqual(comm,&n,&N));
1808   CHKERRQ(MatSetSizes(*A,m,n,M,N));
1809   a = (Mat_ScaLAPACK*)(*A)->data;
1810   CHKERRQ(PetscBLASIntCast(M,&a->M));
1811   CHKERRQ(PetscBLASIntCast(N,&a->N));
1812   CHKERRQ(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1813   CHKERRQ(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1814   CHKERRQ(PetscBLASIntCast(rsrc,&a->rsrc));
1815   CHKERRQ(PetscBLASIntCast(csrc,&a->csrc));
1816   CHKERRQ(MatSetUp(*A));
1817   PetscFunctionReturn(0);
1818 }
1819