xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 92c846b446c68637aff160714f7520ec87559fb6)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12 {
13   PetscErrorCode    ierr;
14   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
15   PetscBool         iascii;
16   PetscViewerFormat format;
17   Mat               Adense;
18 
19   PetscFunctionBegin;
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21   if (iascii) {
22     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28       PetscFunctionReturn(0);
29     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30       PetscFunctionReturn(0);
31     }
32   }
33   /* convert to dense format and call MatView() */
34   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41 {
42   PetscErrorCode ierr;
43   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
44   PetscLogDouble isend[2],irecv[2];
45 
46   PetscFunctionBegin;
47   info->block_size = 1.0;
48 
49   isend[0] = a->lld*a->locc;     /* locally allocated */
50   isend[1] = a->locr*a->locc;    /* used submatrix */
51   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52     info->nz_allocated   = isend[0];
53     info->nz_used        = isend[1];
54   } else if (flag == MAT_GLOBAL_MAX) {
55     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56     info->nz_allocated   = irecv[0];
57     info->nz_used        = irecv[1];
58   } else if (flag == MAT_GLOBAL_SUM) {
59     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60     info->nz_allocated   = irecv[0];
61     info->nz_used        = irecv[1];
62   }
63 
64   info->nz_unneeded       = 0;
65   info->assemblies        = A->num_ass;
66   info->mallocs           = 0;
67   info->memory            = ((PetscObject)A)->mem;
68   info->fill_ratio_given  = 0;
69   info->fill_ratio_needed = 0;
70   info->factor_mallocs    = 0;
71   PetscFunctionReturn(0);
72 }
73 
74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75 {
76   PetscFunctionBegin;
77   switch (op) {
78     case MAT_NEW_NONZERO_LOCATIONS:
79     case MAT_NEW_NONZERO_LOCATION_ERR:
80     case MAT_NEW_NONZERO_ALLOCATION_ERR:
81     case MAT_SYMMETRIC:
82     case MAT_SORTED_FULL:
83     case MAT_HERMITIAN:
84       break;
85     default:
86       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92 {
93   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j;
96   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
97 
98   PetscFunctionBegin;
99   for (i=0;i<nr;i++) {
100     if (rows[i] < 0) continue;
101     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102     for (j=0;j<nc;j++) {
103       if (cols[j] < 0) continue;
104       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107         switch (imode) {
108           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111         }
112       } else {
113         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114         A->assembled = PETSC_FALSE;
115         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116       }
117     }
118   }
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123 {
124   PetscErrorCode ierr;
125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126   PetscScalar    *x2d,*y2d,alpha=1.0;
127   const PetscInt *ranges;
128   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129 
130   PetscFunctionBegin;
131   if (transpose) {
132 
133     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
136     xlld = PetscMax(1,A->rmap->n);
137     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138     PetscCheckScaLapackInfo("descinit",info);
139     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
141     ylld = 1;
142     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143     PetscCheckScaLapackInfo("descinit",info);
144 
145     /* allocate 2d vectors */
146     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149     xlld = PetscMax(1,lszx);
150 
151     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155     PetscCheckScaLapackInfo("descinit",info);
156 
157     /* redistribute x as a column of a 2d matrix */
158     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159 
160     /* redistribute y as a row of a 2d matrix */
161     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162 
163     /* call PBLAS subroutine */
164     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165 
166     /* redistribute y from a row of a 2d matrix */
167     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168 
169   } else {   /* non-transpose */
170 
171     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
174     xlld = 1;
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
179     ylld = PetscMax(1,A->rmap->n);
180     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182 
183     /* allocate 2d vectors */
184     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187     ylld = PetscMax(1,lszy);
188 
189     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193     PetscCheckScaLapackInfo("descinit",info);
194 
195     /* redistribute x as a row of a 2d matrix */
196     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197 
198     /* redistribute y as a column of a 2d matrix */
199     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200 
201     /* call PBLAS subroutine */
202     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203 
204     /* redistribute y from a column of a 2d matrix */
205     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206 
207   }
208   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213 {
214   PetscErrorCode    ierr;
215   const PetscScalar *xarray;
216   PetscScalar       *yarray;
217 
218   PetscFunctionBegin;
219   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   PetscErrorCode    ierr;
230   const PetscScalar *xarray;
231   PetscScalar       *yarray;
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243 {
244   PetscErrorCode    ierr;
245   const PetscScalar *xarray;
246   PetscScalar       *zarray;
247 
248   PetscFunctionBegin;
249   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259 {
260   PetscErrorCode    ierr;
261   const PetscScalar *xarray;
262   PetscScalar       *zarray;
263 
264   PetscFunctionBegin;
265   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275 {
276   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279   PetscScalar   sone=1.0,zero=0.0;
280   PetscBLASInt  one=1;
281 
282   PetscFunctionBegin;
283   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284   C->assembled = PETSC_TRUE;
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295   ierr = MatSetUp(C);CHKERRQ(ierr);
296   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301 {
302   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305   PetscScalar   sone=1.0,zero=0.0;
306   PetscBLASInt  one=1;
307 
308   PetscFunctionBegin;
309   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310   C->assembled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321   ierr = MatSetUp(C);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327 {
328   PetscFunctionBegin;
329   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330   C->ops->productsymbolic = MatProductSymbolic_AB;
331   PetscFunctionReturn(0);
332 }
333 
334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335 {
336   PetscFunctionBegin;
337   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339   PetscFunctionReturn(0);
340 }
341 
342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343 {
344   PetscErrorCode ierr;
345   Mat_Product    *product = C->product;
346 
347   PetscFunctionBegin;
348   switch (product->type) {
349     case MATPRODUCT_AB:
350       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351       break;
352     case MATPRODUCT_ABt:
353       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354       break;
355     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356   }
357   PetscFunctionReturn(0);
358 }
359 /* --------------------------------------- */
360 
361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362 {
363   PetscErrorCode    ierr;
364   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365   PetscScalar       *darray,*d2d,v;
366   const PetscInt    *ranges;
367   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368 
369   PetscFunctionBegin;
370   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371 
372   if (A->rmap->N<=A->cmap->N) {   /* row version */
373 
374     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
377     dlld = PetscMax(1,A->rmap->n);
378     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379     PetscCheckScaLapackInfo("descinit",info);
380 
381     /* allocate 2d vector */
382     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384     dlld = PetscMax(1,lszd);
385 
386     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388     PetscCheckScaLapackInfo("descinit",info);
389 
390     /* collect diagonal */
391     for (j=1;j<=a->M;j++) {
392       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394     }
395 
396     /* redistribute d from a column of a 2d matrix */
397     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398     ierr = PetscFree(d2d);CHKERRQ(ierr);
399 
400   } else {   /* column version */
401 
402     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
405     dlld = 1;
406     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407     PetscCheckScaLapackInfo("descinit",info);
408 
409     /* allocate 2d vector */
410     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412 
413     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415     PetscCheckScaLapackInfo("descinit",info);
416 
417     /* collect diagonal */
418     for (j=1;j<=a->N;j++) {
419       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421     }
422 
423     /* redistribute d from a row of a 2d matrix */
424     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425     ierr = PetscFree(d2d);CHKERRQ(ierr);
426   }
427 
428   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435 {
436   PetscErrorCode    ierr;
437   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438   const PetscScalar *d;
439   const PetscInt    *ranges;
440   PetscScalar       *d2d;
441   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442 
443   PetscFunctionBegin;
444   if (R) {
445     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
449     dlld = 1;
450     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451     PetscCheckScaLapackInfo("descinit",info);
452 
453     /* allocate 2d vector */
454     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456 
457     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459     PetscCheckScaLapackInfo("descinit",info);
460 
461     /* redistribute d to a row of a 2d matrix */
462     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463 
464     /* broadcast along process columns */
465     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467 
468     /* local scaling */
469     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470 
471     ierr = PetscFree(d2d);CHKERRQ(ierr);
472     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473   }
474   if (L) {
475     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
479     dlld = PetscMax(1,A->rmap->n);
480     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481     PetscCheckScaLapackInfo("descinit",info);
482 
483     /* allocate 2d vector */
484     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486     dlld = PetscMax(1,lszd);
487 
488     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490     PetscCheckScaLapackInfo("descinit",info);
491 
492     /* redistribute d to a column of a 2d matrix */
493     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494 
495     /* broadcast along process rows */
496     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498 
499     /* local scaling */
500     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501 
502     ierr = PetscFree(d2d);CHKERRQ(ierr);
503     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509 {
510   PetscFunctionBegin;
511   *missing = PETSC_FALSE;
512   PetscFunctionReturn(0);
513 }
514 
515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516 {
517   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518   PetscBLASInt  n,one=1;
519 
520   PetscFunctionBegin;
521   n = x->lld*x->locc;
522   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523   PetscFunctionReturn(0);
524 }
525 
526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527 {
528   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529   PetscBLASInt  i,n;
530   PetscScalar   v;
531 
532   PetscFunctionBegin;
533   n = PetscMin(x->M,x->N);
534   for (i=1;i<=n;i++) {
535     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536     v += alpha;
537     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543 {
544   PetscErrorCode ierr;
545   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547   PetscBLASInt   one=1;
548   PetscScalar    beta=1.0;
549 
550   PetscFunctionBegin;
551   MatScaLAPACKCheckDistribution(Y,1,X,3);
552   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558 {
559   PetscErrorCode ierr;
560   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
562 
563   PetscFunctionBegin;
564   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570 {
571   Mat            Bs;
572   MPI_Comm       comm;
573   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581   b = (Mat_ScaLAPACK*)Bs->data;
582   b->M    = a->M;
583   b->N    = a->N;
584   b->mb   = a->mb;
585   b->nb   = a->nb;
586   b->rsrc = a->rsrc;
587   b->csrc = a->csrc;
588   ierr = MatSetUp(Bs);CHKERRQ(ierr);
589   *B = Bs;
590   if (op == MAT_COPY_VALUES) {
591     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592   }
593   Bs->assembled = PETSC_TRUE;
594   PetscFunctionReturn(0);
595 }
596 
597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598 {
599   PetscErrorCode ierr;
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613     ierr = MatSetUp(Bs);CHKERRQ(ierr);
614     *B = Bs;
615   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616   b = (Mat_ScaLAPACK*)Bs->data;
617   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618 #if defined(PETSC_USE_COMPLEX)
619   /* undo conjugation */
620   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621 #endif
622   Bs->assembled = PETSC_TRUE;
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627 {
628   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629   PetscInt      i,j;
630 
631   PetscFunctionBegin;
632   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637 {
638   PetscErrorCode ierr;
639   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640   Mat            Bs = *B;
641   PetscBLASInt   one=1;
642   PetscScalar    sone=1.0,zero=0.0;
643 
644   PetscFunctionBegin;
645   if (reuse == MAT_INITIAL_MATRIX) {
646     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649     ierr = MatSetUp(Bs);CHKERRQ(ierr);
650     *B = Bs;
651   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652   b = (Mat_ScaLAPACK*)Bs->data;
653   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654   Bs->assembled = PETSC_TRUE;
655   PetscFunctionReturn(0);
656 }
657 
658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659 {
660   PetscErrorCode ierr;
661   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662   PetscScalar    *x,*x2d;
663   const PetscInt *ranges;
664   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665 
666   PetscFunctionBegin;
667   ierr = VecCopy(B,X);CHKERRQ(ierr);
668   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669 
670   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
673   xlld = PetscMax(1,A->rmap->n);
674   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675   PetscCheckScaLapackInfo("descinit",info);
676 
677   /* allocate 2d vector */
678   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680   xlld = PetscMax(1,lszx);
681 
682   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684   PetscCheckScaLapackInfo("descinit",info);
685 
686   /* redistribute x as a column of a 2d matrix */
687   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688 
689   /* call ScaLAPACK subroutine */
690   switch (A->factortype) {
691     case MAT_FACTOR_LU:
692       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693       PetscCheckScaLapackInfo("getrs",info);
694       break;
695     case MAT_FACTOR_CHOLESKY:
696       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("potrs",info);
698       break;
699     default:
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701       break;
702   }
703 
704   /* redistribute x from a column of a 2d matrix */
705   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
706 
707   ierr = PetscFree(x2d);CHKERRQ(ierr);
708   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713 {
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
718   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723 {
724   PetscErrorCode ierr;
725   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726   PetscBool      flg1,flg2;
727   PetscBLASInt   one=1,info;
728 
729   PetscFunctionBegin;
730   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
732   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733   MatScaLAPACKCheckDistribution(B,1,X,2);
734   b = (Mat_ScaLAPACK*)B->data;
735   x = (Mat_ScaLAPACK*)X->data;
736   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
737 
738   switch (A->factortype) {
739     case MAT_FACTOR_LU:
740       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
741       PetscCheckScaLapackInfo("getrs",info);
742       break;
743     case MAT_FACTOR_CHOLESKY:
744       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
745       PetscCheckScaLapackInfo("potrs",info);
746       break;
747     default:
748       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749       break;
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755 {
756   PetscErrorCode ierr;
757   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758   PetscBLASInt   one=1,info;
759 
760   PetscFunctionBegin;
761   if (!a->pivots) {
762     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
763     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
764   }
765   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766   PetscCheckScaLapackInfo("getrf",info);
767   A->factortype = MAT_FACTOR_LU;
768   A->assembled  = PETSC_TRUE;
769 
770   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
771   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776 {
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 
785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786 {
787   PetscFunctionBegin;
788   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793 {
794   PetscErrorCode ierr;
795   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
796   PetscBLASInt   one=1,info;
797 
798   PetscFunctionBegin;
799   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
800   PetscCheckScaLapackInfo("potrf",info);
801   A->factortype = MAT_FACTOR_CHOLESKY;
802   A->assembled  = PETSC_TRUE;
803 
804   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
805   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810 {
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820 {
821   PetscFunctionBegin;
822   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823   PetscFunctionReturn(0);
824 }
825 
826 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827 {
828   PetscFunctionBegin;
829   *type = MATSOLVERSCALAPACK;
830   PetscFunctionReturn(0);
831 }
832 
833 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834 {
835   Mat            B;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   /* Create the factorization matrix */
840   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
841   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
842   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
843   ierr = MatSetUp(B);CHKERRQ(ierr);
844   B->factortype = ftype;
845   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847 
848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849   *F = B;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864 {
865   PetscErrorCode ierr;
866   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867   PetscBLASInt   one=1,lwork=0;
868   const char     *ntype;
869   PetscScalar    *work,dummy;
870 
871   PetscFunctionBegin;
872   switch (type){
873     case NORM_1:
874       ntype = "1";
875       lwork = PetscMax(a->locr,a->locc);
876       break;
877     case NORM_FROBENIUS:
878       ntype = "F";
879       work  = &dummy;
880       break;
881     case NORM_INFINITY:
882       ntype = "I";
883       lwork = PetscMax(a->locr,a->locc);
884       break;
885     default:
886       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887   }
888   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891   PetscFunctionReturn(0);
892 }
893 
894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895 {
896   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905 {
906   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
909 
910   PetscFunctionBegin;
911   if (rows) {
912     n     = a->locr;
913     nb    = a->mb;
914     isrc  = a->rsrc;
915     nproc = a->grid->nprow;
916     iproc = a->grid->myrow;
917     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920   }
921   if (cols) {
922     n     = a->locc;
923     nb    = a->nb;
924     isrc  = a->csrc;
925     nproc = a->grid->npcol;
926     iproc = a->grid->mycol;
927     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935 {
936   PetscErrorCode ierr;
937   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938   Mat            Bmpi;
939   MPI_Comm       comm;
940   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
941   const PetscInt *ranges;
942   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
943   PetscScalar    *barray;
944 
945   PetscFunctionBegin;
946   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
947 
948   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
949   else {
950     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
951     m = PETSC_DECIDE;
952     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
953     n = PETSC_DECIDE;
954     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
955     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
956     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
957     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
958   }
959 
960   /* create ScaLAPACK descriptor for B (1d block distribution) */
961   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
962   ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
963   lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
964   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
965   PetscCheckScaLapackInfo("descinit",info);
966 
967   /* redistribute matrix */
968   ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
969   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
970   ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
971 
972   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   if (reuse == MAT_INPLACE_MATRIX) {
975     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
976   } else *B = Bmpi;
977   PetscFunctionReturn(0);
978 }
979 
980 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
981 {
982   PetscErrorCode ierr;
983   Mat_ScaLAPACK  *b;
984   Mat            Bmpi;
985   MPI_Comm       comm;
986   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
987   const PetscInt *ranges;
988   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
989   PetscScalar    *aarray;
990   PetscInt       lda;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
994 
995   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
996   else {
997     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
998     m = PETSC_DECIDE;
999     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1000     n = PETSC_DECIDE;
1001     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1002     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1003     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1004     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1005   }
1006   b = (Mat_ScaLAPACK*)Bmpi->data;
1007 
1008   /* create ScaLAPACK descriptor for A (1d block distribution) */
1009   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1010   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1011   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1012   lld = PetscMax(lda,1);  /* local leading dimension */
1013   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1014   PetscCheckScaLapackInfo("descinit",info);
1015 
1016   /* redistribute matrix */
1017   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1018   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1019   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1020 
1021   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1022   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023   if (reuse == MAT_INPLACE_MATRIX) {
1024     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1025   } else *B = Bmpi;
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1030 {
1031   Mat               mat_scal;
1032   PetscErrorCode    ierr;
1033   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1034   const PetscInt    *cols;
1035   const PetscScalar *vals;
1036 
1037   PetscFunctionBegin;
1038   if (reuse == MAT_REUSE_MATRIX) {
1039     mat_scal = *newmat;
1040     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1041   } else {
1042     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1043     m = PETSC_DECIDE;
1044     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1045     n = PETSC_DECIDE;
1046     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1047     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1048     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1049     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1050   }
1051   for (row=rstart;row<rend;row++) {
1052     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1053     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1054     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1055   }
1056   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1057   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1058 
1059   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1060   else *newmat = mat_scal;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1065 {
1066   Mat               mat_scal;
1067   PetscErrorCode    ierr;
1068   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1069   const PetscInt    *cols;
1070   const PetscScalar *vals;
1071   PetscScalar       v;
1072 
1073   PetscFunctionBegin;
1074   if (reuse == MAT_REUSE_MATRIX) {
1075     mat_scal = *newmat;
1076     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1077   } else {
1078     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1079     m = PETSC_DECIDE;
1080     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1081     n = PETSC_DECIDE;
1082     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1083     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1084     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1085     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1086   }
1087   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1088   for (row=rstart;row<rend;row++) {
1089     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1090     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1091     for (j=0;j<ncols;j++) { /* lower triangular part */
1092       if (cols[j] == row) continue;
1093       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1094       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1095     }
1096     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1097   }
1098   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1099   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1101 
1102   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1103   else *newmat = mat_scal;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1108 {
1109   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       sz=0;
1112 
1113   PetscFunctionBegin;
1114   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1115   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1116   if (!a->lld) a->lld = a->locr;
1117 
1118   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1119   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1120   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1121   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1122 
1123   A->preallocated = PETSC_TRUE;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1128 {
1129   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1130   PetscErrorCode     ierr;
1131   Mat_ScaLAPACK_Grid *grid;
1132   PetscBool          flg;
1133   MPI_Comm           icomm;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1137   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1138   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1139   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1140   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1141   if (--grid->grid_refct == 0) {
1142     Cblacs_gridexit(grid->ictxt);
1143     Cblacs_gridexit(grid->ictxrow);
1144     Cblacs_gridexit(grid->ictxcol);
1145     ierr = PetscFree(grid);CHKERRQ(ierr);
1146     ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1147   }
1148   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscFree(A->data);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1158 {
1159   PetscErrorCode ierr;
1160   const PetscInt *ranges;
1161   PetscMPIInt    size;
1162   PetscInt       i,n;
1163 
1164   PetscFunctionBegin;
1165   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1166   if (size>2) {
1167     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1168     n = ranges[1]-ranges[0];
1169     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1170     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1176 {
1177   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1178   PetscErrorCode ierr;
1179   PetscBLASInt   info=0;
1180 
1181   PetscFunctionBegin;
1182   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1183   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1184 
1185   /* check that the layout is as enforced by MatCreateScaLAPACK */
1186   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1187   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1188 
1189   /* compute local sizes */
1190   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1191   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1192   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1193   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1194   a->lld  = PetscMax(1,a->locr);
1195 
1196   /* allocate local array */
1197   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1198 
1199   /* set up ScaLAPACK descriptor */
1200   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1201   PetscCheckScaLapackInfo("descinit",info);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1206 {
1207   PetscErrorCode ierr;
1208   PetscInt       nstash,reallocs;
1209 
1210   PetscFunctionBegin;
1211   if (A->nooffprocentries) PetscFunctionReturn(0);
1212   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1213   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1214   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1219 {
1220   PetscErrorCode ierr;
1221   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1222   PetscMPIInt    n;
1223   PetscInt       i,flg,*row,*col;
1224   PetscScalar    *val;
1225   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1226 
1227   PetscFunctionBegin;
1228   if (A->nooffprocentries) PetscFunctionReturn(0);
1229   while (1) {
1230     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1231     if (!flg) break;
1232     for (i=0;i<n;i++) {
1233       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1234       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1235       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1236       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1237       switch (A->insertmode) {
1238         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1239         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1240         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1241       }
1242     }
1243   }
1244   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1249 {
1250   PetscErrorCode ierr;
1251   Mat            Adense,As;
1252   MPI_Comm       comm;
1253 
1254   PetscFunctionBegin;
1255   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1256   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1257   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1258   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1259   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1260   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1261   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 /* -------------------------------------------------------------------*/
1266 static struct _MatOps MatOps_Values = {
1267        MatSetValues_ScaLAPACK,
1268        0,
1269        0,
1270        MatMult_ScaLAPACK,
1271 /* 4*/ MatMultAdd_ScaLAPACK,
1272        MatMultTranspose_ScaLAPACK,
1273        MatMultTransposeAdd_ScaLAPACK,
1274        MatSolve_ScaLAPACK,
1275        MatSolveAdd_ScaLAPACK,
1276        0,
1277 /*10*/ 0,
1278        MatLUFactor_ScaLAPACK,
1279        MatCholeskyFactor_ScaLAPACK,
1280        0,
1281        MatTranspose_ScaLAPACK,
1282 /*15*/ MatGetInfo_ScaLAPACK,
1283        0,
1284        MatGetDiagonal_ScaLAPACK,
1285        MatDiagonalScale_ScaLAPACK,
1286        MatNorm_ScaLAPACK,
1287 /*20*/ MatAssemblyBegin_ScaLAPACK,
1288        MatAssemblyEnd_ScaLAPACK,
1289        MatSetOption_ScaLAPACK,
1290        MatZeroEntries_ScaLAPACK,
1291 /*24*/ 0,
1292        MatLUFactorSymbolic_ScaLAPACK,
1293        MatLUFactorNumeric_ScaLAPACK,
1294        MatCholeskyFactorSymbolic_ScaLAPACK,
1295        MatCholeskyFactorNumeric_ScaLAPACK,
1296 /*29*/ MatSetUp_ScaLAPACK,
1297        0,
1298        0,
1299        0,
1300        0,
1301 /*34*/ MatDuplicate_ScaLAPACK,
1302        0,
1303        0,
1304        0,
1305        0,
1306 /*39*/ MatAXPY_ScaLAPACK,
1307        0,
1308        0,
1309        0,
1310        MatCopy_ScaLAPACK,
1311 /*44*/ 0,
1312        MatScale_ScaLAPACK,
1313        MatShift_ScaLAPACK,
1314        0,
1315        0,
1316 /*49*/ 0,
1317        0,
1318        0,
1319        0,
1320        0,
1321 /*54*/ 0,
1322        0,
1323        0,
1324        0,
1325        0,
1326 /*59*/ 0,
1327        MatDestroy_ScaLAPACK,
1328        MatView_ScaLAPACK,
1329        0,
1330        0,
1331 /*64*/ 0,
1332        0,
1333        0,
1334        0,
1335        0,
1336 /*69*/ 0,
1337        0,
1338        MatConvert_ScaLAPACK_Dense,
1339        0,
1340        0,
1341 /*74*/ 0,
1342        0,
1343        0,
1344        0,
1345        0,
1346 /*79*/ 0,
1347        0,
1348        0,
1349        0,
1350        MatLoad_ScaLAPACK,
1351 /*84*/ 0,
1352        0,
1353        0,
1354        0,
1355        0,
1356 /*89*/ 0,
1357        0,
1358        MatMatMultNumeric_ScaLAPACK,
1359        0,
1360        0,
1361 /*94*/ 0,
1362        0,
1363        0,
1364        MatMatTransposeMultNumeric_ScaLAPACK,
1365        0,
1366 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1367        0,
1368        0,
1369        MatConjugate_ScaLAPACK,
1370        0,
1371 /*104*/0,
1372        0,
1373        0,
1374        0,
1375        0,
1376 /*109*/MatMatSolve_ScaLAPACK,
1377        0,
1378        0,
1379        0,
1380        MatMissingDiagonal_ScaLAPACK,
1381 /*114*/0,
1382        0,
1383        0,
1384        0,
1385        0,
1386 /*119*/0,
1387        MatHermitianTranspose_ScaLAPACK,
1388        0,
1389        0,
1390        0,
1391 /*124*/0,
1392        0,
1393        0,
1394        0,
1395        0,
1396 /*129*/0,
1397        0,
1398        0,
1399        0,
1400        0,
1401 /*134*/0,
1402        0,
1403        0,
1404        0,
1405        0,
1406        0,
1407 /*140*/0,
1408        0,
1409        0,
1410        0,
1411        0,
1412 /*145*/0,
1413        0,
1414        0
1415 };
1416 
1417 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1418 {
1419   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1420   PetscInt           size=stash->size,nsends;
1421   PetscErrorCode     ierr;
1422   PetscInt           count,*sindices,**rindices,i,j,l;
1423   PetscScalar        **rvalues,*svalues;
1424   MPI_Comm           comm = stash->comm;
1425   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1426   PetscMPIInt        *sizes,*nlengths,nreceives;
1427   PetscInt           *sp_idx,*sp_idy;
1428   PetscScalar        *sp_val;
1429   PetscMatStashSpace space,space_next;
1430   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1431   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1432 
1433   PetscFunctionBegin;
1434   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1435     InsertMode addv;
1436     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1437     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1438     mat->insertmode = addv; /* in case this processor had no cache */
1439   }
1440 
1441   bs2 = stash->bs*stash->bs;
1442 
1443   /*  first count number of contributors to each processor */
1444   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1445   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1446 
1447   i     = j    = 0;
1448   space = stash->space_head;
1449   while (space) {
1450     space_next = space->next;
1451     for (l=0; l<space->local_used; l++) {
1452       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1453       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1454       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1455       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1456       nlengths[j]++; owner[i] = j;
1457       i++;
1458     }
1459     space = space_next;
1460   }
1461 
1462   /* Now check what procs get messages - and compute nsends. */
1463   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1464   for (i=0, nsends=0; i<size; i++) {
1465     if (nlengths[i]) {
1466       sizes[i] = 1; nsends++;
1467     }
1468   }
1469 
1470   {PetscMPIInt *onodes,*olengths;
1471    /* Determine the number of messages to expect, their lengths, from from-ids */
1472    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1473    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1474    /* since clubbing row,col - lengths are multiplied by 2 */
1475    for (i=0; i<nreceives; i++) olengths[i] *=2;
1476    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1477    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1478    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1479    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1480    ierr = PetscFree(onodes);CHKERRQ(ierr);
1481    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1482 
1483   /* do sends:
1484       1) starts[i] gives the starting index in svalues for stuff going to
1485          the ith processor
1486   */
1487   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1488   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1489   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1490   /* use 2 sends the first with all_a, the next with all_i and all_j */
1491   startv[0] = 0; starti[0] = 0;
1492   for (i=1; i<size; i++) {
1493     startv[i] = startv[i-1] + nlengths[i-1];
1494     starti[i] = starti[i-1] + 2*nlengths[i-1];
1495   }
1496 
1497   i     = 0;
1498   space = stash->space_head;
1499   while (space) {
1500     space_next = space->next;
1501     sp_idx     = space->idx;
1502     sp_idy     = space->idy;
1503     sp_val     = space->val;
1504     for (l=0; l<space->local_used; l++) {
1505       j = owner[i];
1506       if (bs2 == 1) {
1507         svalues[startv[j]] = sp_val[l];
1508       } else {
1509         PetscInt    k;
1510         PetscScalar *buf1,*buf2;
1511         buf1 = svalues+bs2*startv[j];
1512         buf2 = space->val + bs2*l;
1513         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1514       }
1515       sindices[starti[j]]             = sp_idx[l];
1516       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1517       startv[j]++;
1518       starti[j]++;
1519       i++;
1520     }
1521     space = space_next;
1522   }
1523   startv[0] = 0;
1524   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1525 
1526   for (i=0,count=0; i<size; i++) {
1527     if (sizes[i]) {
1528       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1529       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1530     }
1531   }
1532 #if defined(PETSC_USE_INFO)
1533   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1534   for (i=0; i<size; i++) {
1535     if (sizes[i]) {
1536       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1537     }
1538   }
1539 #endif
1540   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1541   ierr = PetscFree(owner);CHKERRQ(ierr);
1542   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1543   ierr = PetscFree(sizes);CHKERRQ(ierr);
1544 
1545   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1546   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1547 
1548   for (i=0; i<nreceives; i++) {
1549     recv_waits[2*i]   = recv_waits1[i];
1550     recv_waits[2*i+1] = recv_waits2[i];
1551   }
1552   stash->recv_waits = recv_waits;
1553 
1554   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1555   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1556 
1557   stash->svalues         = svalues;
1558   stash->sindices        = sindices;
1559   stash->rvalues         = rvalues;
1560   stash->rindices        = rindices;
1561   stash->send_waits      = send_waits;
1562   stash->nsends          = nsends;
1563   stash->nrecvs          = nreceives;
1564   stash->reproduce_count = 0;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1569 {
1570   PetscErrorCode ierr;
1571   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1572 
1573   PetscFunctionBegin;
1574   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1575   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1576   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1577   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1578   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 /*@
1583    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1584    the ScaLAPACK matrix
1585 
1586    Logically Collective on A
1587 
1588    Input Parameter:
1589 +  A  - a MATSCALAPACK matrix
1590 .  mb - the row block size
1591 -  nb - the column block size
1592 
1593    Level: intermediate
1594 
1595 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1596 @*/
1597 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1598 {
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1603   PetscValidLogicalCollectiveInt(A,mb,2);
1604   PetscValidLogicalCollectiveInt(A,nb,3);
1605   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1610 {
1611   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1612 
1613   PetscFunctionBegin;
1614   if (mb) *mb = a->mb;
1615   if (nb) *nb = a->nb;
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 /*@
1620    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1621    the ScaLAPACK matrix
1622 
1623    Not collective
1624 
1625    Input Parameter:
1626 .  A  - a MATSCALAPACK matrix
1627 
1628    Output Parameters:
1629 +  mb - the row block size
1630 -  nb - the column block size
1631 
1632    Level: intermediate
1633 
1634 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1635 @*/
1636 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1637 {
1638   PetscErrorCode ierr;
1639 
1640   PetscFunctionBegin;
1641   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1642   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1647 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1648 
1649 /*MC
1650    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1651 
1652    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1653 
1654    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1655 
1656    Options Database Keys:
1657 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1658 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1659 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1660 
1661    Level: beginner
1662 
1663 .seealso: MATDENSE, MATELEMENTAL
1664 M*/
1665 
1666 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1667 {
1668   Mat_ScaLAPACK      *a;
1669   PetscErrorCode     ierr;
1670   PetscBool          flg,flg1;
1671   Mat_ScaLAPACK_Grid *grid;
1672   MPI_Comm           icomm;
1673   PetscBLASInt       nprow,npcol,myrow,mycol;
1674   PetscInt           optv1,k=2,array[2]={0,0};
1675   PetscMPIInt        size;
1676 
1677   PetscFunctionBegin;
1678   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1679   A->insertmode = NOT_SET_VALUES;
1680 
1681   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1682   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1683   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1684   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1685   A->stash.ScatterDestroy = NULL;
1686 
1687   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1688   A->data = (void*)a;
1689 
1690   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1691   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1692     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1693   }
1694   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1695   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1696   if (!flg) {
1697     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1698 
1699     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1700     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1701 
1702     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1703     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1704     if (flg1) {
1705       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1706       grid->nprow = optv1;
1707     }
1708     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1709 
1710     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1711     grid->npcol = size/grid->nprow;
1712     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1713     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1714     Cblacs_get(-1,0,&grid->ictxt);
1715     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1716     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1717     grid->grid_refct = 1;
1718     grid->nprow      = nprow;
1719     grid->npcol      = npcol;
1720     grid->myrow      = myrow;
1721     grid->mycol      = mycol;
1722     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1723     Cblacs_get(-1,0,&grid->ictxrow);
1724     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1725     Cblacs_get(-1,0,&grid->ictxcol);
1726     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1727     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1728 
1729   } else grid->grid_refct++;
1730   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1731   a->grid = grid;
1732   a->mb   = DEFAULT_BLOCKSIZE;
1733   a->nb   = DEFAULT_BLOCKSIZE;
1734 
1735   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1736   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1737   if (flg) {
1738     a->mb = array[0];
1739     a->nb = (k>1)? array[1]: a->mb;
1740   }
1741   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1742 
1743   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1744   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1745   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1746   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 /*@C
1751    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1752    (2D block cyclic distribution).
1753 
1754    Collective
1755 
1756    Input Parameters:
1757 +  comm - MPI communicator
1758 .  mb   - row block size (or PETSC_DECIDE to have it set)
1759 .  nb   - column block size (or PETSC_DECIDE to have it set)
1760 .  M    - number of global rows
1761 .  N    - number of global columns
1762 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1763 -  csrc - coordinate of process that owns the first column of the distributed matrix
1764 
1765    Output Parameter:
1766 .  A - the matrix
1767 
1768    Options Database Keys:
1769 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1770 
1771    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1772    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1773    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1774 
1775    Notes:
1776    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1777    is chosen.
1778 
1779    Storage Information:
1780    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1781    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1782    significance and are thus ignored. The block sizes refer to the values
1783    used for the distributed matrix, not the same meaning as in BAIJ.
1784 
1785    Level: intermediate
1786 
1787 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1788 @*/
1789 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1790 {
1791   PetscErrorCode ierr;
1792   Mat_ScaLAPACK  *a;
1793   PetscInt       m,n;
1794 
1795   PetscFunctionBegin;
1796   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1797   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1798   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1799   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1800   m = PETSC_DECIDE;
1801   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1802   n = PETSC_DECIDE;
1803   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1804   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1805   a = (Mat_ScaLAPACK*)(*A)->data;
1806   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1807   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1808   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1809   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1810   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1811   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1812   ierr = MatSetUp(*A);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816