xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 17359960c2bb273c52a03452053b2d3aab5ca7a5)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12 {
13   PetscErrorCode    ierr;
14   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
15   PetscBool         iascii;
16   PetscViewerFormat format;
17   Mat               Adense;
18 
19   PetscFunctionBegin;
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21   if (iascii) {
22     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28       PetscFunctionReturn(0);
29     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30       PetscFunctionReturn(0);
31     }
32   }
33   /* convert to dense format and call MatView() */
34   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41 {
42   PetscErrorCode ierr;
43   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
44   PetscLogDouble isend[2],irecv[2];
45 
46   PetscFunctionBegin;
47   info->block_size = 1.0;
48 
49   isend[0] = a->lld*a->locc;     /* locally allocated */
50   isend[1] = a->locr*a->locc;    /* used submatrix */
51   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52     info->nz_allocated   = isend[0];
53     info->nz_used        = isend[1];
54   } else if (flag == MAT_GLOBAL_MAX) {
55     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56     info->nz_allocated   = irecv[0];
57     info->nz_used        = irecv[1];
58   } else if (flag == MAT_GLOBAL_SUM) {
59     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60     info->nz_allocated   = irecv[0];
61     info->nz_used        = irecv[1];
62   }
63 
64   info->nz_unneeded       = 0;
65   info->assemblies        = A->num_ass;
66   info->mallocs           = 0;
67   info->memory            = ((PetscObject)A)->mem;
68   info->fill_ratio_given  = 0;
69   info->fill_ratio_needed = 0;
70   info->factor_mallocs    = 0;
71   PetscFunctionReturn(0);
72 }
73 
74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75 {
76   PetscFunctionBegin;
77   switch (op) {
78     case MAT_NEW_NONZERO_LOCATIONS:
79     case MAT_NEW_NONZERO_LOCATION_ERR:
80     case MAT_NEW_NONZERO_ALLOCATION_ERR:
81     case MAT_SYMMETRIC:
82     case MAT_SORTED_FULL:
83     case MAT_HERMITIAN:
84       break;
85     default:
86       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92 {
93   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j;
96   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
97 
98   PetscFunctionBegin;
99   for (i=0;i<nr;i++) {
100     if (rows[i] < 0) continue;
101     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102     for (j=0;j<nc;j++) {
103       if (cols[j] < 0) continue;
104       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107         switch (imode) {
108           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111         }
112       } else {
113         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114         A->assembled = PETSC_FALSE;
115         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116       }
117     }
118   }
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123 {
124   PetscErrorCode ierr;
125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126   PetscScalar    *x2d,*y2d,alpha=1.0;
127   const PetscInt *ranges;
128   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129 
130   PetscFunctionBegin;
131   if (transpose) {
132 
133     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
136     xlld = PetscMax(1,A->rmap->n);
137     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138     PetscCheckScaLapackInfo("descinit",info);
139     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
141     ylld = 1;
142     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143     PetscCheckScaLapackInfo("descinit",info);
144 
145     /* allocate 2d vectors */
146     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149     xlld = PetscMax(1,lszx);
150 
151     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155     PetscCheckScaLapackInfo("descinit",info);
156 
157     /* redistribute x as a column of a 2d matrix */
158     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159 
160     /* redistribute y as a row of a 2d matrix */
161     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162 
163     /* call PBLAS subroutine */
164     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165 
166     /* redistribute y from a row of a 2d matrix */
167     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168 
169   } else {   /* non-transpose */
170 
171     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
174     xlld = 1;
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
179     ylld = PetscMax(1,A->rmap->n);
180     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182 
183     /* allocate 2d vectors */
184     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187     ylld = PetscMax(1,lszy);
188 
189     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193     PetscCheckScaLapackInfo("descinit",info);
194 
195     /* redistribute x as a row of a 2d matrix */
196     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197 
198     /* redistribute y as a column of a 2d matrix */
199     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200 
201     /* call PBLAS subroutine */
202     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203 
204     /* redistribute y from a column of a 2d matrix */
205     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206 
207   }
208   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213 {
214   PetscErrorCode    ierr;
215   const PetscScalar *xarray;
216   PetscScalar       *yarray;
217 
218   PetscFunctionBegin;
219   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   PetscErrorCode    ierr;
230   const PetscScalar *xarray;
231   PetscScalar       *yarray;
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243 {
244   PetscErrorCode    ierr;
245   const PetscScalar *xarray;
246   PetscScalar       *zarray;
247 
248   PetscFunctionBegin;
249   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259 {
260   PetscErrorCode    ierr;
261   const PetscScalar *xarray;
262   PetscScalar       *zarray;
263 
264   PetscFunctionBegin;
265   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275 {
276   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279   PetscScalar   sone=1.0,zero=0.0;
280   PetscBLASInt  one=1;
281 
282   PetscFunctionBegin;
283   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284   C->assembled = PETSC_TRUE;
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295   ierr = MatSetUp(C);CHKERRQ(ierr);
296   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301 {
302   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305   PetscScalar   sone=1.0,zero=0.0;
306   PetscBLASInt  one=1;
307 
308   PetscFunctionBegin;
309   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310   C->assembled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321   ierr = MatSetUp(C);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327 {
328   PetscFunctionBegin;
329   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330   C->ops->productsymbolic = MatProductSymbolic_AB;
331   PetscFunctionReturn(0);
332 }
333 
334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335 {
336   PetscFunctionBegin;
337   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339   PetscFunctionReturn(0);
340 }
341 
342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343 {
344   PetscErrorCode ierr;
345   Mat_Product    *product = C->product;
346 
347   PetscFunctionBegin;
348   switch (product->type) {
349     case MATPRODUCT_AB:
350       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351       break;
352     case MATPRODUCT_ABt:
353       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354       break;
355     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356   }
357   PetscFunctionReturn(0);
358 }
359 /* --------------------------------------- */
360 
361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362 {
363   PetscErrorCode    ierr;
364   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365   PetscScalar       *darray,*d2d,v;
366   const PetscInt    *ranges;
367   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368 
369   PetscFunctionBegin;
370   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371 
372   if (A->rmap->N<=A->cmap->N) {   /* row version */
373 
374     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
377     dlld = PetscMax(1,A->rmap->n);
378     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379     PetscCheckScaLapackInfo("descinit",info);
380 
381     /* allocate 2d vector */
382     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384     dlld = PetscMax(1,lszd);
385 
386     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388     PetscCheckScaLapackInfo("descinit",info);
389 
390     /* collect diagonal */
391     for (j=1;j<=a->M;j++) {
392       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394     }
395 
396     /* redistribute d from a column of a 2d matrix */
397     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398     ierr = PetscFree(d2d);CHKERRQ(ierr);
399 
400   } else {   /* column version */
401 
402     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
405     dlld = 1;
406     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407     PetscCheckScaLapackInfo("descinit",info);
408 
409     /* allocate 2d vector */
410     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412 
413     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415     PetscCheckScaLapackInfo("descinit",info);
416 
417     /* collect diagonal */
418     for (j=1;j<=a->N;j++) {
419       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421     }
422 
423     /* redistribute d from a row of a 2d matrix */
424     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425     ierr = PetscFree(d2d);CHKERRQ(ierr);
426   }
427 
428   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435 {
436   PetscErrorCode    ierr;
437   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438   const PetscScalar *d;
439   const PetscInt    *ranges;
440   PetscScalar       *d2d;
441   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442 
443   PetscFunctionBegin;
444   if (R) {
445     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
449     dlld = 1;
450     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451     PetscCheckScaLapackInfo("descinit",info);
452 
453     /* allocate 2d vector */
454     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456 
457     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459     PetscCheckScaLapackInfo("descinit",info);
460 
461     /* redistribute d to a row of a 2d matrix */
462     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463 
464     /* broadcast along process columns */
465     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467 
468     /* local scaling */
469     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470 
471     ierr = PetscFree(d2d);CHKERRQ(ierr);
472     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473   }
474   if (L) {
475     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
479     dlld = PetscMax(1,A->rmap->n);
480     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481     PetscCheckScaLapackInfo("descinit",info);
482 
483     /* allocate 2d vector */
484     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486     dlld = PetscMax(1,lszd);
487 
488     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490     PetscCheckScaLapackInfo("descinit",info);
491 
492     /* redistribute d to a column of a 2d matrix */
493     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494 
495     /* broadcast along process rows */
496     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498 
499     /* local scaling */
500     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501 
502     ierr = PetscFree(d2d);CHKERRQ(ierr);
503     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509 {
510   PetscFunctionBegin;
511   *missing = PETSC_FALSE;
512   PetscFunctionReturn(0);
513 }
514 
515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516 {
517   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518   PetscBLASInt  n,one=1;
519 
520   PetscFunctionBegin;
521   n = x->lld*x->locc;
522   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523   PetscFunctionReturn(0);
524 }
525 
526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527 {
528   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529   PetscBLASInt  i,n;
530   PetscScalar   v;
531 
532   PetscFunctionBegin;
533   n = PetscMin(x->M,x->N);
534   for (i=1;i<=n;i++) {
535     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536     v += alpha;
537     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543 {
544   PetscErrorCode ierr;
545   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547   PetscBLASInt   one=1;
548   PetscScalar    beta=1.0;
549 
550   PetscFunctionBegin;
551   MatScaLAPACKCheckDistribution(Y,1,X,3);
552   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558 {
559   PetscErrorCode ierr;
560   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
562 
563   PetscFunctionBegin;
564   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570 {
571   Mat            Bs;
572   MPI_Comm       comm;
573   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581   b = (Mat_ScaLAPACK*)Bs->data;
582   b->M    = a->M;
583   b->N    = a->N;
584   b->mb   = a->mb;
585   b->nb   = a->nb;
586   b->rsrc = a->rsrc;
587   b->csrc = a->csrc;
588   ierr = MatSetUp(Bs);CHKERRQ(ierr);
589   *B = Bs;
590   if (op == MAT_COPY_VALUES) {
591     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592   }
593   Bs->assembled = PETSC_TRUE;
594   PetscFunctionReturn(0);
595 }
596 
597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598 {
599   PetscErrorCode ierr;
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613     ierr = MatSetUp(Bs);CHKERRQ(ierr);
614     *B = Bs;
615   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616   b = (Mat_ScaLAPACK*)Bs->data;
617   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618 #if defined(PETSC_USE_COMPLEX)
619   /* undo conjugation */
620   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621 #endif
622   Bs->assembled = PETSC_TRUE;
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627 {
628   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629   PetscInt      i,j;
630 
631   PetscFunctionBegin;
632   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637 {
638   PetscErrorCode ierr;
639   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640   Mat            Bs = *B;
641   PetscBLASInt   one=1;
642   PetscScalar    sone=1.0,zero=0.0;
643 
644   PetscFunctionBegin;
645   if (reuse == MAT_INITIAL_MATRIX) {
646     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649     ierr = MatSetUp(Bs);CHKERRQ(ierr);
650     *B = Bs;
651   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652   b = (Mat_ScaLAPACK*)Bs->data;
653   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654   Bs->assembled = PETSC_TRUE;
655   PetscFunctionReturn(0);
656 }
657 
658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659 {
660   PetscErrorCode ierr;
661   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662   PetscScalar    *x,*x2d;
663   const PetscInt *ranges;
664   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665 
666   PetscFunctionBegin;
667   ierr = VecCopy(B,X);CHKERRQ(ierr);
668   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669 
670   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
673   xlld = PetscMax(1,A->rmap->n);
674   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675   PetscCheckScaLapackInfo("descinit",info);
676 
677   /* allocate 2d vector */
678   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680   xlld = PetscMax(1,lszx);
681 
682   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684   PetscCheckScaLapackInfo("descinit",info);
685 
686   /* redistribute x as a column of a 2d matrix */
687   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688 
689   /* call ScaLAPACK subroutine */
690   switch (A->factortype) {
691     case MAT_FACTOR_LU:
692       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693       PetscCheckScaLapackInfo("getrs",info);
694       break;
695     case MAT_FACTOR_CHOLESKY:
696       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("potrs",info);
698       break;
699     default:
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701       break;
702   }
703 
704   /* redistribute x from a column of a 2d matrix */
705   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
706 
707   ierr = PetscFree(x2d);CHKERRQ(ierr);
708   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713 {
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
718   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723 {
724   PetscErrorCode ierr;
725   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726   PetscBool      flg1,flg2;
727   PetscBLASInt   one=1,info;
728 
729   PetscFunctionBegin;
730   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
732   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733   MatScaLAPACKCheckDistribution(B,1,X,2);
734   b = (Mat_ScaLAPACK*)B->data;
735   x = (Mat_ScaLAPACK*)X->data;
736   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
737 
738   switch (A->factortype) {
739     case MAT_FACTOR_LU:
740       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
741       PetscCheckScaLapackInfo("getrs",info);
742       break;
743     case MAT_FACTOR_CHOLESKY:
744       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
745       PetscCheckScaLapackInfo("potrs",info);
746       break;
747     default:
748       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749       break;
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755 {
756   PetscErrorCode ierr;
757   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758   PetscBLASInt   one=1,info;
759 
760   PetscFunctionBegin;
761   if (!a->pivots) {
762     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
763     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
764   }
765   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766   PetscCheckScaLapackInfo("getrf",info);
767   A->factortype = MAT_FACTOR_LU;
768   A->assembled  = PETSC_TRUE;
769 
770   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
771   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776 {
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 
785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786 {
787   PetscFunctionBegin;
788   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793 {
794   PetscErrorCode ierr;
795   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
796   PetscBLASInt   one=1,info;
797 
798   PetscFunctionBegin;
799   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
800   PetscCheckScaLapackInfo("potrf",info);
801   A->factortype = MAT_FACTOR_CHOLESKY;
802   A->assembled  = PETSC_TRUE;
803 
804   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
805   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810 {
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820 {
821   PetscFunctionBegin;
822   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823   PetscFunctionReturn(0);
824 }
825 
826 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827 {
828   PetscFunctionBegin;
829   *type = MATSOLVERSCALAPACK;
830   PetscFunctionReturn(0);
831 }
832 
833 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834 {
835   Mat            B;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   /* Create the factorization matrix */
840   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
841   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
842   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
843   ierr = MatSetUp(B);CHKERRQ(ierr);
844   B->factortype = ftype;
845   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847 
848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849   *F = B;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864 {
865   PetscErrorCode ierr;
866   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867   PetscBLASInt   one=1,lwork=0;
868   const char     *ntype;
869   PetscScalar    *work,dummy;
870 
871   PetscFunctionBegin;
872   switch (type){
873     case NORM_1:
874       ntype = "1";
875       lwork = PetscMax(a->locr,a->locc);
876       break;
877     case NORM_FROBENIUS:
878       ntype = "F";
879       work  = &dummy;
880       break;
881     case NORM_INFINITY:
882       ntype = "I";
883       lwork = PetscMax(a->locr,a->locc);
884       break;
885     default:
886       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887   }
888   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891   PetscFunctionReturn(0);
892 }
893 
894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895 {
896   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905 {
906   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
909 
910   PetscFunctionBegin;
911   if (rows) {
912     n     = a->locr;
913     nb    = a->mb;
914     isrc  = a->rsrc;
915     nproc = a->grid->nprow;
916     iproc = a->grid->myrow;
917     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920   }
921   if (cols) {
922     n     = a->locc;
923     nb    = a->nb;
924     isrc  = a->csrc;
925     nproc = a->grid->npcol;
926     iproc = a->grid->mycol;
927     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935 {
936   PetscErrorCode ierr;
937   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938   Mat            Bmpi;
939   MPI_Comm       comm;
940   const PetscInt *ranges;
941   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
942   PetscScalar    *barray;
943 
944   PetscFunctionBegin;
945   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
946 
947   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
948   else {
949     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
950     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
951     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
952     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
953   }
954 
955   /* create ScaLAPACK descriptor for B (1d block distribution) */
956   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
957   ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
958   lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
959   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
960   PetscCheckScaLapackInfo("descinit",info);
961 
962   /* redistribute matrix */
963   ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
964   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
965   ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
966 
967   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   if (reuse == MAT_INPLACE_MATRIX) {
970     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
971   } else *B = Bmpi;
972   PetscFunctionReturn(0);
973 }
974 
975 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
976 {
977   PetscErrorCode ierr;
978   Mat_ScaLAPACK  *b;
979   Mat            Bmpi;
980   MPI_Comm       comm;
981   const PetscInt *ranges;
982   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
983   PetscScalar    *aarray;
984   PetscInt       lda;
985 
986   PetscFunctionBegin;
987   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
988 
989   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
990   else {
991     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
992     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
993     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
994     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
995   }
996   b = (Mat_ScaLAPACK*)Bmpi->data;
997 
998   /* create ScaLAPACK descriptor for A (1d block distribution) */
999   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1000   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1001   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1002   lld = PetscMax(lda,1);  /* local leading dimension */
1003   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1004   PetscCheckScaLapackInfo("descinit",info);
1005 
1006   /* redistribute matrix */
1007   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1008   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1009   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1010 
1011   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1012   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1013   if (reuse == MAT_INPLACE_MATRIX) {
1014     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1015   } else *B = Bmpi;
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1020 {
1021   Mat               mat_scal;
1022   PetscErrorCode    ierr;
1023   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1024   const PetscInt    *cols;
1025   const PetscScalar *vals;
1026 
1027   PetscFunctionBegin;
1028   if (reuse == MAT_REUSE_MATRIX) {
1029     mat_scal = *newmat;
1030     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1031   } else {
1032     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1033     m = PETSC_DECIDE;
1034     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1035     n = PETSC_DECIDE;
1036     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1037     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1038     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1039     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1040   }
1041   for (row=rstart;row<rend;row++) {
1042     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1043     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1044     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1045   }
1046   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1047   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1048 
1049   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1050   else *newmat = mat_scal;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1055 {
1056   Mat               mat_scal;
1057   PetscErrorCode    ierr;
1058   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1059   const PetscInt    *cols;
1060   const PetscScalar *vals;
1061   PetscScalar       v;
1062 
1063   PetscFunctionBegin;
1064   if (reuse == MAT_REUSE_MATRIX) {
1065     mat_scal = *newmat;
1066     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1067   } else {
1068     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1069     m = PETSC_DECIDE;
1070     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1071     n = PETSC_DECIDE;
1072     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1073     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1074     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1075     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1076   }
1077   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1078   for (row=rstart;row<rend;row++) {
1079     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1080     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1081     for (j=0;j<ncols;j++) { /* lower triangular part */
1082       if (cols[j] == row) continue;
1083       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1084       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1085     }
1086     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1087   }
1088   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1089   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1090   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1091 
1092   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1093   else *newmat = mat_scal;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1098 {
1099   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1100   PetscErrorCode ierr;
1101   PetscInt       sz=0;
1102 
1103   PetscFunctionBegin;
1104   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1105   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1106   if (!a->lld) a->lld = a->locr;
1107 
1108   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1109   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1110   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1111   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1112 
1113   A->preallocated = PETSC_TRUE;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1118 {
1119   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1120   PetscErrorCode     ierr;
1121   Mat_ScaLAPACK_Grid *grid;
1122   PetscBool          flg;
1123   MPI_Comm           icomm;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1127   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1128   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1129   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1130   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1131   if (--grid->grid_refct == 0) {
1132     Cblacs_gridexit(grid->ictxt);
1133     Cblacs_gridexit(grid->ictxrow);
1134     Cblacs_gridexit(grid->ictxcol);
1135     ierr = PetscFree(grid);CHKERRQ(ierr);
1136     ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1137   }
1138   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscFree(A->data);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1148 {
1149   PetscErrorCode ierr;
1150   const PetscInt *ranges;
1151   PetscMPIInt    size;
1152   PetscInt       i,n;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1156   if (size>2) {
1157     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1158     n = ranges[1]-ranges[0];
1159     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1160     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1161   }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1166 {
1167   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1168   PetscErrorCode ierr;
1169   PetscBLASInt   info=0;
1170 
1171   PetscFunctionBegin;
1172   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1173   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1174 
1175   /* check that the layout is as enforced by MatCreateScaLAPACK */
1176   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1177   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1178 
1179   /* compute local sizes */
1180   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1181   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1182   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1183   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1184   a->lld  = PetscMax(1,a->locr);
1185 
1186   /* allocate local array */
1187   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1188 
1189   /* set up ScaLAPACK descriptor */
1190   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1191   PetscCheckScaLapackInfo("descinit",info);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1196 {
1197   PetscErrorCode ierr;
1198   PetscInt       nstash,reallocs;
1199 
1200   PetscFunctionBegin;
1201   if (A->nooffprocentries) PetscFunctionReturn(0);
1202   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1203   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1204   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1209 {
1210   PetscErrorCode ierr;
1211   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1212   PetscMPIInt    n;
1213   PetscInt       i,flg,*row,*col;
1214   PetscScalar    *val;
1215   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1216 
1217   PetscFunctionBegin;
1218   if (A->nooffprocentries) PetscFunctionReturn(0);
1219   while (1) {
1220     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1221     if (!flg) break;
1222     for (i=0;i<n;i++) {
1223       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1224       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1225       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1226       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1227       switch (A->insertmode) {
1228         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1229         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1230         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1231       }
1232     }
1233   }
1234   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1239 {
1240   PetscErrorCode ierr;
1241   Mat            Adense,As;
1242   MPI_Comm       comm;
1243 
1244   PetscFunctionBegin;
1245   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1246   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1247   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1248   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1249   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1250   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1251   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 /* -------------------------------------------------------------------*/
1256 static struct _MatOps MatOps_Values = {
1257        MatSetValues_ScaLAPACK,
1258        0,
1259        0,
1260        MatMult_ScaLAPACK,
1261 /* 4*/ MatMultAdd_ScaLAPACK,
1262        MatMultTranspose_ScaLAPACK,
1263        MatMultTransposeAdd_ScaLAPACK,
1264        MatSolve_ScaLAPACK,
1265        MatSolveAdd_ScaLAPACK,
1266        0,
1267 /*10*/ 0,
1268        MatLUFactor_ScaLAPACK,
1269        MatCholeskyFactor_ScaLAPACK,
1270        0,
1271        MatTranspose_ScaLAPACK,
1272 /*15*/ MatGetInfo_ScaLAPACK,
1273        0,
1274        MatGetDiagonal_ScaLAPACK,
1275        MatDiagonalScale_ScaLAPACK,
1276        MatNorm_ScaLAPACK,
1277 /*20*/ MatAssemblyBegin_ScaLAPACK,
1278        MatAssemblyEnd_ScaLAPACK,
1279        MatSetOption_ScaLAPACK,
1280        MatZeroEntries_ScaLAPACK,
1281 /*24*/ 0,
1282        MatLUFactorSymbolic_ScaLAPACK,
1283        MatLUFactorNumeric_ScaLAPACK,
1284        MatCholeskyFactorSymbolic_ScaLAPACK,
1285        MatCholeskyFactorNumeric_ScaLAPACK,
1286 /*29*/ MatSetUp_ScaLAPACK,
1287        0,
1288        0,
1289        0,
1290        0,
1291 /*34*/ MatDuplicate_ScaLAPACK,
1292        0,
1293        0,
1294        0,
1295        0,
1296 /*39*/ MatAXPY_ScaLAPACK,
1297        0,
1298        0,
1299        0,
1300        MatCopy_ScaLAPACK,
1301 /*44*/ 0,
1302        MatScale_ScaLAPACK,
1303        MatShift_ScaLAPACK,
1304        0,
1305        0,
1306 /*49*/ 0,
1307        0,
1308        0,
1309        0,
1310        0,
1311 /*54*/ 0,
1312        0,
1313        0,
1314        0,
1315        0,
1316 /*59*/ 0,
1317        MatDestroy_ScaLAPACK,
1318        MatView_ScaLAPACK,
1319        0,
1320        0,
1321 /*64*/ 0,
1322        0,
1323        0,
1324        0,
1325        0,
1326 /*69*/ 0,
1327        0,
1328        MatConvert_ScaLAPACK_Dense,
1329        0,
1330        0,
1331 /*74*/ 0,
1332        0,
1333        0,
1334        0,
1335        0,
1336 /*79*/ 0,
1337        0,
1338        0,
1339        0,
1340        MatLoad_ScaLAPACK,
1341 /*84*/ 0,
1342        0,
1343        0,
1344        0,
1345        0,
1346 /*89*/ 0,
1347        0,
1348        MatMatMultNumeric_ScaLAPACK,
1349        0,
1350        0,
1351 /*94*/ 0,
1352        0,
1353        0,
1354        MatMatTransposeMultNumeric_ScaLAPACK,
1355        0,
1356 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1357        0,
1358        0,
1359        MatConjugate_ScaLAPACK,
1360        0,
1361 /*104*/0,
1362        0,
1363        0,
1364        0,
1365        0,
1366 /*109*/MatMatSolve_ScaLAPACK,
1367        0,
1368        0,
1369        0,
1370        MatMissingDiagonal_ScaLAPACK,
1371 /*114*/0,
1372        0,
1373        0,
1374        0,
1375        0,
1376 /*119*/0,
1377        MatHermitianTranspose_ScaLAPACK,
1378        0,
1379        0,
1380        0,
1381 /*124*/0,
1382        0,
1383        0,
1384        0,
1385        0,
1386 /*129*/0,
1387        0,
1388        0,
1389        0,
1390        0,
1391 /*134*/0,
1392        0,
1393        0,
1394        0,
1395        0,
1396        0,
1397 /*140*/0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*145*/0,
1403        0,
1404        0
1405 };
1406 
1407 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1408 {
1409   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1410   PetscInt           size=stash->size,nsends;
1411   PetscErrorCode     ierr;
1412   PetscInt           count,*sindices,**rindices,i,j,l;
1413   PetscScalar        **rvalues,*svalues;
1414   MPI_Comm           comm = stash->comm;
1415   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1416   PetscMPIInt        *sizes,*nlengths,nreceives;
1417   PetscInt           *sp_idx,*sp_idy;
1418   PetscScalar        *sp_val;
1419   PetscMatStashSpace space,space_next;
1420   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1421   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1422 
1423   PetscFunctionBegin;
1424   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1425     InsertMode addv;
1426     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1427     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1428     mat->insertmode = addv; /* in case this processor had no cache */
1429   }
1430 
1431   bs2 = stash->bs*stash->bs;
1432 
1433   /*  first count number of contributors to each processor */
1434   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1435   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1436 
1437   i     = j    = 0;
1438   space = stash->space_head;
1439   while (space) {
1440     space_next = space->next;
1441     for (l=0; l<space->local_used; l++) {
1442       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1443       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1444       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1445       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1446       nlengths[j]++; owner[i] = j;
1447       i++;
1448     }
1449     space = space_next;
1450   }
1451 
1452   /* Now check what procs get messages - and compute nsends. */
1453   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1454   for (i=0, nsends=0; i<size; i++) {
1455     if (nlengths[i]) {
1456       sizes[i] = 1; nsends++;
1457     }
1458   }
1459 
1460   {PetscMPIInt *onodes,*olengths;
1461    /* Determine the number of messages to expect, their lengths, from from-ids */
1462    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1463    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1464    /* since clubbing row,col - lengths are multiplied by 2 */
1465    for (i=0; i<nreceives; i++) olengths[i] *=2;
1466    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1467    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1468    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1469    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1470    ierr = PetscFree(onodes);CHKERRQ(ierr);
1471    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1472 
1473   /* do sends:
1474       1) starts[i] gives the starting index in svalues for stuff going to
1475          the ith processor
1476   */
1477   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1478   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1479   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1480   /* use 2 sends the first with all_a, the next with all_i and all_j */
1481   startv[0] = 0; starti[0] = 0;
1482   for (i=1; i<size; i++) {
1483     startv[i] = startv[i-1] + nlengths[i-1];
1484     starti[i] = starti[i-1] + 2*nlengths[i-1];
1485   }
1486 
1487   i     = 0;
1488   space = stash->space_head;
1489   while (space) {
1490     space_next = space->next;
1491     sp_idx     = space->idx;
1492     sp_idy     = space->idy;
1493     sp_val     = space->val;
1494     for (l=0; l<space->local_used; l++) {
1495       j = owner[i];
1496       if (bs2 == 1) {
1497         svalues[startv[j]] = sp_val[l];
1498       } else {
1499         PetscInt    k;
1500         PetscScalar *buf1,*buf2;
1501         buf1 = svalues+bs2*startv[j];
1502         buf2 = space->val + bs2*l;
1503         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1504       }
1505       sindices[starti[j]]             = sp_idx[l];
1506       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1507       startv[j]++;
1508       starti[j]++;
1509       i++;
1510     }
1511     space = space_next;
1512   }
1513   startv[0] = 0;
1514   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1515 
1516   for (i=0,count=0; i<size; i++) {
1517     if (sizes[i]) {
1518       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1519       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1520     }
1521   }
1522 #if defined(PETSC_USE_INFO)
1523   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1524   for (i=0; i<size; i++) {
1525     if (sizes[i]) {
1526       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1527     }
1528   }
1529 #endif
1530   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1531   ierr = PetscFree(owner);CHKERRQ(ierr);
1532   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1533   ierr = PetscFree(sizes);CHKERRQ(ierr);
1534 
1535   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1536   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1537 
1538   for (i=0; i<nreceives; i++) {
1539     recv_waits[2*i]   = recv_waits1[i];
1540     recv_waits[2*i+1] = recv_waits2[i];
1541   }
1542   stash->recv_waits = recv_waits;
1543 
1544   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1545   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1546 
1547   stash->svalues         = svalues;
1548   stash->sindices        = sindices;
1549   stash->rvalues         = rvalues;
1550   stash->rindices        = rindices;
1551   stash->send_waits      = send_waits;
1552   stash->nsends          = nsends;
1553   stash->nrecvs          = nreceives;
1554   stash->reproduce_count = 0;
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1559 {
1560   PetscErrorCode ierr;
1561   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1562 
1563   PetscFunctionBegin;
1564   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1565   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1566   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1567   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1568   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 /*@
1573    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1574    the ScaLAPACK matrix
1575 
1576    Logically Collective on A
1577 
1578    Input Parameter:
1579 +  A  - a MATSCALAPACK matrix
1580 .  mb - the row block size
1581 -  nb - the column block size
1582 
1583    Level: intermediate
1584 
1585 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1586 @*/
1587 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1588 {
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1593   PetscValidLogicalCollectiveInt(A,mb,2);
1594   PetscValidLogicalCollectiveInt(A,nb,3);
1595   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1600 {
1601   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1602 
1603   PetscFunctionBegin;
1604   if (mb) *mb = a->mb;
1605   if (nb) *nb = a->nb;
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 /*@
1610    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1611    the ScaLAPACK matrix
1612 
1613    Not collective
1614 
1615    Input Parameter:
1616 .  A  - a MATSCALAPACK matrix
1617 
1618    Output Parameters:
1619 +  mb - the row block size
1620 -  nb - the column block size
1621 
1622    Level: intermediate
1623 
1624 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1625 @*/
1626 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1627 {
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1632   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1637 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1638 
1639 /*MC
1640    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1641 
1642    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1643 
1644    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1645 
1646    Options Database Keys:
1647 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1648 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1649 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1650 
1651    Level: beginner
1652 
1653 .seealso: MATDENSE, MATELEMENTAL
1654 M*/
1655 
1656 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1657 {
1658   Mat_ScaLAPACK      *a;
1659   PetscErrorCode     ierr;
1660   PetscBool          flg,flg1;
1661   Mat_ScaLAPACK_Grid *grid;
1662   MPI_Comm           icomm;
1663   PetscBLASInt       nprow,npcol,myrow,mycol;
1664   PetscInt           optv1,k=2,array[2]={0,0};
1665   PetscMPIInt        size;
1666 
1667   PetscFunctionBegin;
1668   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1669   A->insertmode = NOT_SET_VALUES;
1670 
1671   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1672   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1673   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1674   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1675   A->stash.ScatterDestroy = NULL;
1676 
1677   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1678   A->data = (void*)a;
1679 
1680   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1681   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1682     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1683   }
1684   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1685   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1686   if (!flg) {
1687     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1688 
1689     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1690     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1691 
1692     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1693     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1694     if (flg1) {
1695       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1696       grid->nprow = optv1;
1697     }
1698     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1699 
1700     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1701     grid->npcol = size/grid->nprow;
1702     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1703     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1704     Cblacs_get(-1,0,&grid->ictxt);
1705     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1706     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1707     grid->grid_refct = 1;
1708     grid->nprow      = nprow;
1709     grid->npcol      = npcol;
1710     grid->myrow      = myrow;
1711     grid->mycol      = mycol;
1712     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1713     Cblacs_get(-1,0,&grid->ictxrow);
1714     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1715     Cblacs_get(-1,0,&grid->ictxcol);
1716     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1717     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1718 
1719   } else grid->grid_refct++;
1720   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1721   a->grid = grid;
1722   a->mb   = DEFAULT_BLOCKSIZE;
1723   a->nb   = DEFAULT_BLOCKSIZE;
1724 
1725   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1726   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1727   if (flg) {
1728     a->mb = array[0];
1729     a->nb = (k>1)? array[1]: a->mb;
1730   }
1731   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1732 
1733   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1736   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 /*@C
1741    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1742    (2D block cyclic distribution).
1743 
1744    Collective
1745 
1746    Input Parameters:
1747 +  comm - MPI communicator
1748 .  mb   - row block size (or PETSC_DECIDE to have it set)
1749 .  nb   - column block size (or PETSC_DECIDE to have it set)
1750 .  M    - number of global rows
1751 .  N    - number of global columns
1752 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1753 -  csrc - coordinate of process that owns the first column of the distributed matrix
1754 
1755    Output Parameter:
1756 .  A - the matrix
1757 
1758    Options Database Keys:
1759 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1760 
1761    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1762    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1763    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1764 
1765    Notes:
1766    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1767    is chosen.
1768 
1769    Storage Information:
1770    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1771    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1772    significance and are thus ignored. The block sizes refer to the values
1773    used for the distributed matrix, not the same meaning as in BAIJ.
1774 
1775    Level: intermediate
1776 
1777 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1778 @*/
1779 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1780 {
1781   PetscErrorCode ierr;
1782   Mat_ScaLAPACK  *a;
1783   PetscInt       m,n;
1784 
1785   PetscFunctionBegin;
1786   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1787   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1788   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1789   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1790   m = PETSC_DECIDE;
1791   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1792   n = PETSC_DECIDE;
1793   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1794   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1795   a = (Mat_ScaLAPACK*)(*A)->data;
1796   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1797   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1798   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1799   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1800   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1801   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1802   ierr = MatSetUp(*A);CHKERRQ(ierr);
1803   PetscFunctionReturn(0);
1804 }
1805 
1806