xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision aa406ff9e86cea45c7cc73ee99b64d5fc225173c)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
17   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
22 {
23   PetscErrorCode    ierr;
24   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
25   PetscBool         iascii;
26   PetscViewerFormat format;
27   Mat               Adense;
28 
29   PetscFunctionBegin;
30   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
31   if (iascii) {
32     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
33     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
34       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
35       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
36       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
37       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
38       PetscFunctionReturn(0);
39     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
40       PetscFunctionReturn(0);
41     }
42   }
43   /* convert to dense format and call MatView() */
44   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
45   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
46   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
51 {
52   PetscErrorCode ierr;
53   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
54   PetscLogDouble isend[2],irecv[2];
55 
56   PetscFunctionBegin;
57   info->block_size = 1.0;
58 
59   isend[0] = a->lld*a->locc;     /* locally allocated */
60   isend[1] = a->locr*a->locc;    /* used submatrix */
61   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
62     info->nz_allocated   = isend[0];
63     info->nz_used        = isend[1];
64   } else if (flag == MAT_GLOBAL_MAX) {
65     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
66     info->nz_allocated   = irecv[0];
67     info->nz_used        = irecv[1];
68   } else if (flag == MAT_GLOBAL_SUM) {
69     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
70     info->nz_allocated   = irecv[0];
71     info->nz_used        = irecv[1];
72   }
73 
74   info->nz_unneeded       = 0;
75   info->assemblies        = A->num_ass;
76   info->mallocs           = 0;
77   info->memory            = ((PetscObject)A)->mem;
78   info->fill_ratio_given  = 0;
79   info->fill_ratio_needed = 0;
80   info->factor_mallocs    = 0;
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
85 {
86   PetscFunctionBegin;
87   switch (op) {
88     case MAT_NEW_NONZERO_LOCATIONS:
89     case MAT_NEW_NONZERO_LOCATION_ERR:
90     case MAT_NEW_NONZERO_ALLOCATION_ERR:
91     case MAT_SYMMETRIC:
92     case MAT_SORTED_FULL:
93     case MAT_HERMITIAN:
94       break;
95     default:
96       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102 {
103   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j;
106   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
107 
108   PetscFunctionBegin;
109   for (i=0;i<nr;i++) {
110     if (rows[i] < 0) continue;
111     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
112     for (j=0;j<nc;j++) {
113       if (cols[j] < 0) continue;
114       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
115       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117         switch (imode) {
118           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121         }
122       } else {
123         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124         A->assembled = PETSC_FALSE;
125         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
126       }
127     }
128   }
129   PetscFunctionReturn(0);
130 }
131 
132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133 {
134   PetscErrorCode ierr;
135   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136   PetscScalar    *x2d,*y2d,alpha=1.0;
137   const PetscInt *ranges;
138   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
139 
140   PetscFunctionBegin;
141   if (transpose) {
142 
143     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
145     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
146     xlld = PetscMax(1,A->rmap->n);
147     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148     PetscCheckScaLapackInfo("descinit",info);
149     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
150     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
151     ylld = 1;
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154 
155     /* allocate 2d vectors */
156     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
159     xlld = PetscMax(1,lszx);
160 
161     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163     PetscCheckScaLapackInfo("descinit",info);
164     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
165     PetscCheckScaLapackInfo("descinit",info);
166 
167     /* redistribute x as a column of a 2d matrix */
168     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
169 
170     /* redistribute y as a row of a 2d matrix */
171     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
172 
173     /* call PBLAS subroutine */
174     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
175 
176     /* redistribute y from a row of a 2d matrix */
177     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
178 
179   } else {   /* non-transpose */
180 
181     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
183     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
184     xlld = 1;
185     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186     PetscCheckScaLapackInfo("descinit",info);
187     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
188     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
189     ylld = PetscMax(1,A->rmap->n);
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192 
193     /* allocate 2d vectors */
194     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
197     ylld = PetscMax(1,lszy);
198 
199     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201     PetscCheckScaLapackInfo("descinit",info);
202     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
203     PetscCheckScaLapackInfo("descinit",info);
204 
205     /* redistribute x as a row of a 2d matrix */
206     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
207 
208     /* redistribute y as a column of a 2d matrix */
209     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
210 
211     /* call PBLAS subroutine */
212     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
213 
214     /* redistribute y from a column of a 2d matrix */
215     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
216 
217   }
218   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223 {
224   PetscErrorCode    ierr;
225   const PetscScalar *xarray;
226   PetscScalar       *yarray;
227 
228   PetscFunctionBegin;
229   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
230   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
231   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
232   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
233   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238 {
239   PetscErrorCode    ierr;
240   const PetscScalar *xarray;
241   PetscScalar       *yarray;
242 
243   PetscFunctionBegin;
244   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
245   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
246   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
247   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
248   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253 {
254   PetscErrorCode    ierr;
255   const PetscScalar *xarray;
256   PetscScalar       *zarray;
257 
258   PetscFunctionBegin;
259   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
260   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
262   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
263   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
264   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269 {
270   PetscErrorCode    ierr;
271   const PetscScalar *xarray;
272   PetscScalar       *zarray;
273 
274   PetscFunctionBegin;
275   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
276   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
277   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
278   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
279   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
280   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285 {
286   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289   PetscScalar   sone=1.0,zero=0.0;
290   PetscBLASInt  one=1;
291 
292   PetscFunctionBegin;
293   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294   C->assembled = PETSC_TRUE;
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299 {
300   PetscErrorCode ierr;
301 
302   PetscFunctionBegin;
303   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
304   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
305   ierr = MatSetUp(C);CHKERRQ(ierr);
306   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307   PetscFunctionReturn(0);
308 }
309 
310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311 {
312   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315   PetscScalar   sone=1.0,zero=0.0;
316   PetscBLASInt  one=1;
317 
318   PetscFunctionBegin;
319   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320   C->assembled = PETSC_TRUE;
321   PetscFunctionReturn(0);
322 }
323 
324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325 {
326   PetscErrorCode ierr;
327 
328   PetscFunctionBegin;
329   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
330   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
331   ierr = MatSetUp(C);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 /* --------------------------------------- */
336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337 {
338   PetscFunctionBegin;
339   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340   C->ops->productsymbolic = MatProductSymbolic_AB;
341   PetscFunctionReturn(0);
342 }
343 
344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345 {
346   PetscFunctionBegin;
347   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349   PetscFunctionReturn(0);
350 }
351 
352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353 {
354   PetscErrorCode ierr;
355   Mat_Product    *product = C->product;
356 
357   PetscFunctionBegin;
358   switch (product->type) {
359     case MATPRODUCT_AB:
360       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
361       break;
362     case MATPRODUCT_ABt:
363       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
364       break;
365     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366   }
367   PetscFunctionReturn(0);
368 }
369 /* --------------------------------------- */
370 
371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372 {
373   PetscErrorCode    ierr;
374   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
375   PetscScalar       *darray,*d2d,v;
376   const PetscInt    *ranges;
377   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
381 
382   if (A->rmap->N<=A->cmap->N) {   /* row version */
383 
384     /* create ScaLAPACK descriptor for vector (1d block distribution) */
385     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
386     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
387     dlld = PetscMax(1,A->rmap->n);
388     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
389     PetscCheckScaLapackInfo("descinit",info);
390 
391     /* allocate 2d vector */
392     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
394     dlld = PetscMax(1,lszd);
395 
396     /* create ScaLAPACK descriptor for vector (2d block distribution) */
397     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
398     PetscCheckScaLapackInfo("descinit",info);
399 
400     /* collect diagonal */
401     for (j=1;j<=a->M;j++) {
402       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404     }
405 
406     /* redistribute d from a column of a 2d matrix */
407     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408     ierr = PetscFree(d2d);CHKERRQ(ierr);
409 
410   } else {   /* column version */
411 
412     /* create ScaLAPACK descriptor for vector (1d block distribution) */
413     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
414     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
415     dlld = 1;
416     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
417     PetscCheckScaLapackInfo("descinit",info);
418 
419     /* allocate 2d vector */
420     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
422 
423     /* create ScaLAPACK descriptor for vector (2d block distribution) */
424     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
425     PetscCheckScaLapackInfo("descinit",info);
426 
427     /* collect diagonal */
428     for (j=1;j<=a->N;j++) {
429       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431     }
432 
433     /* redistribute d from a row of a 2d matrix */
434     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435     ierr = PetscFree(d2d);CHKERRQ(ierr);
436   }
437 
438   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
439   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
440   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445 {
446   PetscErrorCode    ierr;
447   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
448   const PetscScalar *d;
449   const PetscInt    *ranges;
450   PetscScalar       *d2d;
451   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
452 
453   PetscFunctionBegin;
454   if (R) {
455     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
456     /* create ScaLAPACK descriptor for vector (1d block distribution) */
457     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
458     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
459     dlld = 1;
460     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
461     PetscCheckScaLapackInfo("descinit",info);
462 
463     /* allocate 2d vector */
464     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
466 
467     /* create ScaLAPACK descriptor for vector (2d block distribution) */
468     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
469     PetscCheckScaLapackInfo("descinit",info);
470 
471     /* redistribute d to a row of a 2d matrix */
472     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
473 
474     /* broadcast along process columns */
475     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
477 
478     /* local scaling */
479     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
480 
481     ierr = PetscFree(d2d);CHKERRQ(ierr);
482     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
483   }
484   if (L) {
485     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
486     /* create ScaLAPACK descriptor for vector (1d block distribution) */
487     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
488     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
489     dlld = PetscMax(1,A->rmap->n);
490     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
491     PetscCheckScaLapackInfo("descinit",info);
492 
493     /* allocate 2d vector */
494     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
496     dlld = PetscMax(1,lszd);
497 
498     /* create ScaLAPACK descriptor for vector (2d block distribution) */
499     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
500     PetscCheckScaLapackInfo("descinit",info);
501 
502     /* redistribute d to a column of a 2d matrix */
503     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
504 
505     /* broadcast along process rows */
506     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
508 
509     /* local scaling */
510     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
511 
512     ierr = PetscFree(d2d);CHKERRQ(ierr);
513     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519 {
520   PetscFunctionBegin;
521   *missing = PETSC_FALSE;
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526 {
527   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528   PetscBLASInt  n,one=1;
529 
530   PetscFunctionBegin;
531   n = x->lld*x->locc;
532   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537 {
538   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539   PetscBLASInt  i,n;
540   PetscScalar   v;
541 
542   PetscFunctionBegin;
543   n = PetscMin(x->M,x->N);
544   for (i=1;i<=n;i++) {
545     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546     v += alpha;
547     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553 {
554   PetscErrorCode ierr;
555   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
556   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
557   PetscBLASInt   one=1;
558   PetscScalar    beta=1.0;
559 
560   PetscFunctionBegin;
561   MatScaLAPACKCheckDistribution(Y,1,X,3);
562   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568 {
569   PetscErrorCode ierr;
570   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
571   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
572 
573   PetscFunctionBegin;
574   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
575   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580 {
581   Mat            Bs;
582   MPI_Comm       comm;
583   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
584   PetscErrorCode ierr;
585 
586   PetscFunctionBegin;
587   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
588   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
589   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
590   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
591   b = (Mat_ScaLAPACK*)Bs->data;
592   b->M    = a->M;
593   b->N    = a->N;
594   b->mb   = a->mb;
595   b->nb   = a->nb;
596   b->rsrc = a->rsrc;
597   b->csrc = a->csrc;
598   ierr = MatSetUp(Bs);CHKERRQ(ierr);
599   *B = Bs;
600   if (op == MAT_COPY_VALUES) {
601     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
602   }
603   Bs->assembled = PETSC_TRUE;
604   PetscFunctionReturn(0);
605 }
606 
607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608 {
609   PetscErrorCode ierr;
610   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
611   Mat            Bs = *B;
612   PetscBLASInt   one=1;
613   PetscScalar    sone=1.0,zero=0.0;
614 #if defined(PETSC_USE_COMPLEX)
615   PetscInt       i,j;
616 #endif
617 
618   PetscFunctionBegin;
619   if (reuse == MAT_INITIAL_MATRIX) {
620     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
621     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
622     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
623     ierr = MatSetUp(Bs);CHKERRQ(ierr);
624     *B = Bs;
625   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
626   b = (Mat_ScaLAPACK*)Bs->data;
627   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
628 #if defined(PETSC_USE_COMPLEX)
629   /* undo conjugation */
630   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
631 #endif
632   Bs->assembled = PETSC_TRUE;
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
637 {
638   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639   PetscInt      i,j;
640 
641   PetscFunctionBegin;
642   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
647 {
648   PetscErrorCode ierr;
649   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
650   Mat            Bs = *B;
651   PetscBLASInt   one=1;
652   PetscScalar    sone=1.0,zero=0.0;
653 
654   PetscFunctionBegin;
655   if (reuse == MAT_INITIAL_MATRIX) {
656     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
657     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
658     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
659     ierr = MatSetUp(Bs);CHKERRQ(ierr);
660     *B = Bs;
661   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
662   b = (Mat_ScaLAPACK*)Bs->data;
663   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
664   Bs->assembled = PETSC_TRUE;
665   PetscFunctionReturn(0);
666 }
667 
668 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
669 {
670   PetscErrorCode ierr;
671   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
672   PetscScalar    *x,*x2d;
673   const PetscInt *ranges;
674   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
675 
676   PetscFunctionBegin;
677   ierr = VecCopy(B,X);CHKERRQ(ierr);
678   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
679 
680   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
681   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
682   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
683   xlld = PetscMax(1,A->rmap->n);
684   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
685   PetscCheckScaLapackInfo("descinit",info);
686 
687   /* allocate 2d vector */
688   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
689   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
690   xlld = PetscMax(1,lszx);
691 
692   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
693   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
694   PetscCheckScaLapackInfo("descinit",info);
695 
696   /* redistribute x as a column of a 2d matrix */
697   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
698 
699   /* call ScaLAPACK subroutine */
700   switch (A->factortype) {
701     case MAT_FACTOR_LU:
702       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
703       PetscCheckScaLapackInfo("getrs",info);
704       break;
705     case MAT_FACTOR_CHOLESKY:
706       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
707       PetscCheckScaLapackInfo("potrs",info);
708       break;
709     default:
710       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
711       break;
712   }
713 
714   /* redistribute x from a column of a 2d matrix */
715   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
716 
717   ierr = PetscFree(x2d);CHKERRQ(ierr);
718   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
723 {
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
728   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
733 {
734   PetscErrorCode ierr;
735   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
736   PetscBool      flg1,flg2;
737   PetscBLASInt   one=1,info;
738 
739   PetscFunctionBegin;
740   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
742   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
743   MatScaLAPACKCheckDistribution(B,1,X,2);
744   b = (Mat_ScaLAPACK*)B->data;
745   x = (Mat_ScaLAPACK*)X->data;
746   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
747 
748   switch (A->factortype) {
749     case MAT_FACTOR_LU:
750       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
751       PetscCheckScaLapackInfo("getrs",info);
752       break;
753     case MAT_FACTOR_CHOLESKY:
754       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
755       PetscCheckScaLapackInfo("potrs",info);
756       break;
757     default:
758       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
759       break;
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
765 {
766   PetscErrorCode ierr;
767   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
768   PetscBLASInt   one=1,info;
769 
770   PetscFunctionBegin;
771   if (!a->pivots) {
772     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
773     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
774   }
775   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
776   PetscCheckScaLapackInfo("getrf",info);
777   A->factortype = MAT_FACTOR_LU;
778   A->assembled  = PETSC_TRUE;
779 
780   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
781   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 
785 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
786 {
787   PetscErrorCode ierr;
788 
789   PetscFunctionBegin;
790   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
791   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
796 {
797   PetscFunctionBegin;
798   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
799   PetscFunctionReturn(0);
800 }
801 
802 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
803 {
804   PetscErrorCode ierr;
805   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
806   PetscBLASInt   one=1,info;
807 
808   PetscFunctionBegin;
809   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
810   PetscCheckScaLapackInfo("potrf",info);
811   A->factortype = MAT_FACTOR_CHOLESKY;
812   A->assembled  = PETSC_TRUE;
813 
814   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
815   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
820 {
821   PetscErrorCode ierr;
822 
823   PetscFunctionBegin;
824   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
825   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
830 {
831   PetscFunctionBegin;
832   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
833   PetscFunctionReturn(0);
834 }
835 
836 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
837 {
838   PetscFunctionBegin;
839   *type = MATSOLVERSCALAPACK;
840   PetscFunctionReturn(0);
841 }
842 
843 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
844 {
845   Mat            B;
846   PetscErrorCode ierr;
847 
848   PetscFunctionBegin;
849   /* Create the factorization matrix */
850   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
851   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
852   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
853   ierr = MatSetUp(B);CHKERRQ(ierr);
854   B->factortype = ftype;
855   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
856   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
857 
858   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
859   *F = B;
860   PetscFunctionReturn(0);
861 }
862 
863 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
864 {
865   PetscErrorCode ierr;
866 
867   PetscFunctionBegin;
868   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
869   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
874 {
875   PetscErrorCode ierr;
876   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
877   PetscBLASInt   one=1,lwork=0;
878   const char     *ntype;
879   PetscScalar    *work=NULL,dummy;
880 
881   PetscFunctionBegin;
882   switch (type){
883     case NORM_1:
884       ntype = "1";
885       lwork = PetscMax(a->locr,a->locc);
886       break;
887     case NORM_FROBENIUS:
888       ntype = "F";
889       work  = &dummy;
890       break;
891     case NORM_INFINITY:
892       ntype = "I";
893       lwork = PetscMax(a->locr,a->locc);
894       break;
895     default:
896       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
897   }
898   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
899   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
900   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
905 {
906   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907   PetscErrorCode ierr;
908 
909   PetscFunctionBegin;
910   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
915 {
916   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
917   PetscErrorCode ierr;
918   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
919 
920   PetscFunctionBegin;
921   if (rows) {
922     n     = a->locr;
923     nb    = a->mb;
924     isrc  = a->rsrc;
925     nproc = a->grid->nprow;
926     iproc = a->grid->myrow;
927     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
930   }
931   if (cols) {
932     n     = a->locc;
933     nb    = a->nb;
934     isrc  = a->csrc;
935     nproc = a->grid->npcol;
936     iproc = a->grid->mycol;
937     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
938     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
939     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
945 {
946   PetscErrorCode ierr;
947   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
948   Mat            Bmpi;
949   MPI_Comm       comm;
950   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
951   const PetscInt *ranges,*branges,*cwork;
952   const PetscScalar *vwork;
953   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
954   PetscScalar    *barray;
955   PetscBool      differ=PETSC_FALSE;
956   PetscMPIInt    size;
957 
958   PetscFunctionBegin;
959   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
960   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
961 
962   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
963     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
964     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
965     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
966   }
967 
968   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
969     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
970     m = PETSC_DECIDE;
971     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
972     n = PETSC_DECIDE;
973     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
974     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
975     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
976     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
977 
978     /* create ScaLAPACK descriptor for B (1d block distribution) */
979     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
980     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
981     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
982     PetscCheckScaLapackInfo("descinit",info);
983 
984     /* redistribute matrix */
985     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
986     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
987     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
988     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
989     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990 
991     /* transfer rows of auxiliary matrix to the final matrix B */
992     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
993     for (i=rstart;i<rend;i++) {
994       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
995       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
996       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
997     }
998     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1000     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
1001 
1002   } else {  /* normal cases */
1003 
1004     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1005     else {
1006       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1007       m = PETSC_DECIDE;
1008       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1009       n = PETSC_DECIDE;
1010       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1011       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1012       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1013       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1014     }
1015 
1016     /* create ScaLAPACK descriptor for B (1d block distribution) */
1017     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1018     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1019     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1020     PetscCheckScaLapackInfo("descinit",info);
1021 
1022     /* redistribute matrix */
1023     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1024     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1025     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1026 
1027     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1028     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     if (reuse == MAT_INPLACE_MATRIX) {
1030       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1031     } else *B = Bmpi;
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1037 {
1038   PetscErrorCode ierr;
1039   Mat_ScaLAPACK  *b;
1040   Mat            Bmpi;
1041   MPI_Comm       comm;
1042   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1043   const PetscInt *ranges;
1044   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1045   PetscScalar    *aarray;
1046   PetscInt       lda;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1050 
1051   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1052   else {
1053     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1054     m = PETSC_DECIDE;
1055     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1056     n = PETSC_DECIDE;
1057     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1058     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1059     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1060     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1061   }
1062   b = (Mat_ScaLAPACK*)Bmpi->data;
1063 
1064   /* create ScaLAPACK descriptor for A (1d block distribution) */
1065   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1066   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1067   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1068   lld = PetscMax(lda,1);  /* local leading dimension */
1069   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1070   PetscCheckScaLapackInfo("descinit",info);
1071 
1072   /* redistribute matrix */
1073   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1074   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1075   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1076 
1077   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1078   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1079   if (reuse == MAT_INPLACE_MATRIX) {
1080     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1081   } else *B = Bmpi;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1086 {
1087   Mat               mat_scal;
1088   PetscErrorCode    ierr;
1089   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1090   const PetscInt    *cols;
1091   const PetscScalar *vals;
1092 
1093   PetscFunctionBegin;
1094   if (reuse == MAT_REUSE_MATRIX) {
1095     mat_scal = *newmat;
1096     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1097   } else {
1098     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1099     m = PETSC_DECIDE;
1100     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1101     n = PETSC_DECIDE;
1102     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1103     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1104     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1105     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1106   }
1107   for (row=rstart;row<rend;row++) {
1108     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1109     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1110     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1111   }
1112   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1113   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1114 
1115   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1116   else *newmat = mat_scal;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1121 {
1122   Mat               mat_scal;
1123   PetscErrorCode    ierr;
1124   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1125   const PetscInt    *cols;
1126   const PetscScalar *vals;
1127   PetscScalar       v;
1128 
1129   PetscFunctionBegin;
1130   if (reuse == MAT_REUSE_MATRIX) {
1131     mat_scal = *newmat;
1132     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1133   } else {
1134     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1135     m = PETSC_DECIDE;
1136     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1137     n = PETSC_DECIDE;
1138     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1139     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1140     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1141     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1142   }
1143   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1144   for (row=rstart;row<rend;row++) {
1145     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1146     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1147     for (j=0;j<ncols;j++) { /* lower triangular part */
1148       if (cols[j] == row) continue;
1149       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1150       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1151     }
1152     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1153   }
1154   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1155   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1156   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1157 
1158   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1159   else *newmat = mat_scal;
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1164 {
1165   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1166   PetscErrorCode ierr;
1167   PetscInt       sz=0;
1168 
1169   PetscFunctionBegin;
1170   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1171   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1172   if (!a->lld) a->lld = a->locr;
1173 
1174   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1175   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1176   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1177   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1178 
1179   A->preallocated = PETSC_TRUE;
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1184 {
1185   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1186   PetscErrorCode     ierr;
1187   Mat_ScaLAPACK_Grid *grid;
1188   PetscBool          flg;
1189   MPI_Comm           icomm;
1190 
1191   PetscFunctionBegin;
1192   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1193   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1194   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1195   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1196   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1197   if (--grid->grid_refct == 0) {
1198     Cblacs_gridexit(grid->ictxt);
1199     Cblacs_gridexit(grid->ictxrow);
1200     Cblacs_gridexit(grid->ictxcol);
1201     ierr = PetscFree(grid);CHKERRQ(ierr);
1202     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1203   }
1204   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscFree(A->data);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1214 {
1215   PetscErrorCode ierr;
1216   const PetscInt *ranges;
1217   PetscMPIInt    size;
1218   PetscInt       i,n;
1219 
1220   PetscFunctionBegin;
1221   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1222   if (size>2) {
1223     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1224     n = ranges[1]-ranges[0];
1225     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1226     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1227   }
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1232 {
1233   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1234   PetscErrorCode ierr;
1235   PetscBLASInt   info=0;
1236 
1237   PetscFunctionBegin;
1238   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1239   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1240 
1241   /* check that the layout is as enforced by MatCreateScaLAPACK */
1242   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1243   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1244 
1245   /* compute local sizes */
1246   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1247   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1248   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1249   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1250   a->lld  = PetscMax(1,a->locr);
1251 
1252   /* allocate local array */
1253   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1254 
1255   /* set up ScaLAPACK descriptor */
1256   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1257   PetscCheckScaLapackInfo("descinit",info);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1262 {
1263   PetscErrorCode ierr;
1264   PetscInt       nstash,reallocs;
1265 
1266   PetscFunctionBegin;
1267   if (A->nooffprocentries) PetscFunctionReturn(0);
1268   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1269   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1270   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1275 {
1276   PetscErrorCode ierr;
1277   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1278   PetscMPIInt    n;
1279   PetscInt       i,flg,*row,*col;
1280   PetscScalar    *val;
1281   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1282 
1283   PetscFunctionBegin;
1284   if (A->nooffprocentries) PetscFunctionReturn(0);
1285   while (1) {
1286     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1287     if (!flg) break;
1288     for (i=0;i<n;i++) {
1289       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1290       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1291       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1292       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1293       switch (A->insertmode) {
1294         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1295         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1296         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1297       }
1298     }
1299   }
1300   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1305 {
1306   PetscErrorCode ierr;
1307   Mat            Adense,As;
1308   MPI_Comm       comm;
1309 
1310   PetscFunctionBegin;
1311   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1312   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1313   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1314   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1315   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1316   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1317   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /* -------------------------------------------------------------------*/
1322 static struct _MatOps MatOps_Values = {
1323        MatSetValues_ScaLAPACK,
1324        0,
1325        0,
1326        MatMult_ScaLAPACK,
1327 /* 4*/ MatMultAdd_ScaLAPACK,
1328        MatMultTranspose_ScaLAPACK,
1329        MatMultTransposeAdd_ScaLAPACK,
1330        MatSolve_ScaLAPACK,
1331        MatSolveAdd_ScaLAPACK,
1332        0,
1333 /*10*/ 0,
1334        MatLUFactor_ScaLAPACK,
1335        MatCholeskyFactor_ScaLAPACK,
1336        0,
1337        MatTranspose_ScaLAPACK,
1338 /*15*/ MatGetInfo_ScaLAPACK,
1339        0,
1340        MatGetDiagonal_ScaLAPACK,
1341        MatDiagonalScale_ScaLAPACK,
1342        MatNorm_ScaLAPACK,
1343 /*20*/ MatAssemblyBegin_ScaLAPACK,
1344        MatAssemblyEnd_ScaLAPACK,
1345        MatSetOption_ScaLAPACK,
1346        MatZeroEntries_ScaLAPACK,
1347 /*24*/ 0,
1348        MatLUFactorSymbolic_ScaLAPACK,
1349        MatLUFactorNumeric_ScaLAPACK,
1350        MatCholeskyFactorSymbolic_ScaLAPACK,
1351        MatCholeskyFactorNumeric_ScaLAPACK,
1352 /*29*/ MatSetUp_ScaLAPACK,
1353        0,
1354        0,
1355        0,
1356        0,
1357 /*34*/ MatDuplicate_ScaLAPACK,
1358        0,
1359        0,
1360        0,
1361        0,
1362 /*39*/ MatAXPY_ScaLAPACK,
1363        0,
1364        0,
1365        0,
1366        MatCopy_ScaLAPACK,
1367 /*44*/ 0,
1368        MatScale_ScaLAPACK,
1369        MatShift_ScaLAPACK,
1370        0,
1371        0,
1372 /*49*/ 0,
1373        0,
1374        0,
1375        0,
1376        0,
1377 /*54*/ 0,
1378        0,
1379        0,
1380        0,
1381        0,
1382 /*59*/ 0,
1383        MatDestroy_ScaLAPACK,
1384        MatView_ScaLAPACK,
1385        0,
1386        0,
1387 /*64*/ 0,
1388        0,
1389        0,
1390        0,
1391        0,
1392 /*69*/ 0,
1393        0,
1394        MatConvert_ScaLAPACK_Dense,
1395        0,
1396        0,
1397 /*74*/ 0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*79*/ 0,
1403        0,
1404        0,
1405        0,
1406        MatLoad_ScaLAPACK,
1407 /*84*/ 0,
1408        0,
1409        0,
1410        0,
1411        0,
1412 /*89*/ 0,
1413        0,
1414        MatMatMultNumeric_ScaLAPACK,
1415        0,
1416        0,
1417 /*94*/ 0,
1418        0,
1419        0,
1420        MatMatTransposeMultNumeric_ScaLAPACK,
1421        0,
1422 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1423        0,
1424        0,
1425        MatConjugate_ScaLAPACK,
1426        0,
1427 /*104*/0,
1428        0,
1429        0,
1430        0,
1431        0,
1432 /*109*/MatMatSolve_ScaLAPACK,
1433        0,
1434        0,
1435        0,
1436        MatMissingDiagonal_ScaLAPACK,
1437 /*114*/0,
1438        0,
1439        0,
1440        0,
1441        0,
1442 /*119*/0,
1443        MatHermitianTranspose_ScaLAPACK,
1444        0,
1445        0,
1446        0,
1447 /*124*/0,
1448        0,
1449        0,
1450        0,
1451        0,
1452 /*129*/0,
1453        0,
1454        0,
1455        0,
1456        0,
1457 /*134*/0,
1458        0,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*140*/0,
1464        0,
1465        0,
1466        0,
1467        0,
1468 /*145*/0,
1469        0,
1470        0
1471 };
1472 
1473 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1474 {
1475   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1476   PetscInt           size=stash->size,nsends;
1477   PetscErrorCode     ierr;
1478   PetscInt           count,*sindices,**rindices,i,j,l;
1479   PetscScalar        **rvalues,*svalues;
1480   MPI_Comm           comm = stash->comm;
1481   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1482   PetscMPIInt        *sizes,*nlengths,nreceives;
1483   PetscInt           *sp_idx,*sp_idy;
1484   PetscScalar        *sp_val;
1485   PetscMatStashSpace space,space_next;
1486   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1487   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1488 
1489   PetscFunctionBegin;
1490   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1491     InsertMode addv;
1492     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1493     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1494     mat->insertmode = addv; /* in case this processor had no cache */
1495   }
1496 
1497   bs2 = stash->bs*stash->bs;
1498 
1499   /*  first count number of contributors to each processor */
1500   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1501   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1502 
1503   i     = j    = 0;
1504   space = stash->space_head;
1505   while (space) {
1506     space_next = space->next;
1507     for (l=0; l<space->local_used; l++) {
1508       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1509       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1510       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1511       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1512       nlengths[j]++; owner[i] = j;
1513       i++;
1514     }
1515     space = space_next;
1516   }
1517 
1518   /* Now check what procs get messages - and compute nsends. */
1519   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1520   for (i=0, nsends=0; i<size; i++) {
1521     if (nlengths[i]) {
1522       sizes[i] = 1; nsends++;
1523     }
1524   }
1525 
1526   {PetscMPIInt *onodes,*olengths;
1527    /* Determine the number of messages to expect, their lengths, from from-ids */
1528    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1529    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1530    /* since clubbing row,col - lengths are multiplied by 2 */
1531    for (i=0; i<nreceives; i++) olengths[i] *=2;
1532    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1533    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1534    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1535    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1536    ierr = PetscFree(onodes);CHKERRQ(ierr);
1537    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1538 
1539   /* do sends:
1540       1) starts[i] gives the starting index in svalues for stuff going to
1541          the ith processor
1542   */
1543   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1544   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1545   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1546   /* use 2 sends the first with all_a, the next with all_i and all_j */
1547   startv[0] = 0; starti[0] = 0;
1548   for (i=1; i<size; i++) {
1549     startv[i] = startv[i-1] + nlengths[i-1];
1550     starti[i] = starti[i-1] + 2*nlengths[i-1];
1551   }
1552 
1553   i     = 0;
1554   space = stash->space_head;
1555   while (space) {
1556     space_next = space->next;
1557     sp_idx     = space->idx;
1558     sp_idy     = space->idy;
1559     sp_val     = space->val;
1560     for (l=0; l<space->local_used; l++) {
1561       j = owner[i];
1562       if (bs2 == 1) {
1563         svalues[startv[j]] = sp_val[l];
1564       } else {
1565         PetscInt    k;
1566         PetscScalar *buf1,*buf2;
1567         buf1 = svalues+bs2*startv[j];
1568         buf2 = space->val + bs2*l;
1569         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1570       }
1571       sindices[starti[j]]             = sp_idx[l];
1572       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1573       startv[j]++;
1574       starti[j]++;
1575       i++;
1576     }
1577     space = space_next;
1578   }
1579   startv[0] = 0;
1580   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1581 
1582   for (i=0,count=0; i<size; i++) {
1583     if (sizes[i]) {
1584       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1585       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1586     }
1587   }
1588 #if defined(PETSC_USE_INFO)
1589   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1590   for (i=0; i<size; i++) {
1591     if (sizes[i]) {
1592       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1593     }
1594   }
1595 #endif
1596   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1597   ierr = PetscFree(owner);CHKERRQ(ierr);
1598   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1599   ierr = PetscFree(sizes);CHKERRQ(ierr);
1600 
1601   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1602   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1603 
1604   for (i=0; i<nreceives; i++) {
1605     recv_waits[2*i]   = recv_waits1[i];
1606     recv_waits[2*i+1] = recv_waits2[i];
1607   }
1608   stash->recv_waits = recv_waits;
1609 
1610   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1611   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1612 
1613   stash->svalues         = svalues;
1614   stash->sindices        = sindices;
1615   stash->rvalues         = rvalues;
1616   stash->rindices        = rindices;
1617   stash->send_waits      = send_waits;
1618   stash->nsends          = nsends;
1619   stash->nrecvs          = nreceives;
1620   stash->reproduce_count = 0;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1625 {
1626   PetscErrorCode ierr;
1627   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1628 
1629   PetscFunctionBegin;
1630   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1631   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1632   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1633   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1634   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 /*@
1639    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1640    the ScaLAPACK matrix
1641 
1642    Logically Collective on A
1643 
1644    Input Parameter:
1645 +  A  - a MATSCALAPACK matrix
1646 .  mb - the row block size
1647 -  nb - the column block size
1648 
1649    Level: intermediate
1650 
1651 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1652 @*/
1653 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1654 {
1655   PetscErrorCode ierr;
1656 
1657   PetscFunctionBegin;
1658   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1659   PetscValidLogicalCollectiveInt(A,mb,2);
1660   PetscValidLogicalCollectiveInt(A,nb,3);
1661   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1666 {
1667   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1668 
1669   PetscFunctionBegin;
1670   if (mb) *mb = a->mb;
1671   if (nb) *nb = a->nb;
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 /*@
1676    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1677    the ScaLAPACK matrix
1678 
1679    Not collective
1680 
1681    Input Parameter:
1682 .  A  - a MATSCALAPACK matrix
1683 
1684    Output Parameters:
1685 +  mb - the row block size
1686 -  nb - the column block size
1687 
1688    Level: intermediate
1689 
1690 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1691 @*/
1692 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1693 {
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1698   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1703 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1704 
1705 /*MC
1706    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1707 
1708    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1709 
1710    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1711 
1712    Options Database Keys:
1713 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1714 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1715 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1716 
1717    Level: beginner
1718 
1719 .seealso: MATDENSE, MATELEMENTAL
1720 M*/
1721 
1722 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1723 {
1724   Mat_ScaLAPACK      *a;
1725   PetscErrorCode     ierr;
1726   PetscBool          flg,flg1;
1727   Mat_ScaLAPACK_Grid *grid;
1728   MPI_Comm           icomm;
1729   PetscBLASInt       nprow,npcol,myrow,mycol;
1730   PetscInt           optv1,k=2,array[2]={0,0};
1731   PetscMPIInt        size;
1732 
1733   PetscFunctionBegin;
1734   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1735   A->insertmode = NOT_SET_VALUES;
1736 
1737   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1738   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1739   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1740   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1741   A->stash.ScatterDestroy = NULL;
1742 
1743   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1744   A->data = (void*)a;
1745 
1746   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1747   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1748     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1749     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1750   }
1751   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1752   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1753   if (!flg) {
1754     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1755 
1756     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1757     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1758 
1759     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1760     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1761     if (flg1) {
1762       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1763       grid->nprow = optv1;
1764     }
1765     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1766 
1767     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1768     grid->npcol = size/grid->nprow;
1769     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1770     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1771     grid->ictxt = Csys2blacs_handle(icomm);
1772     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1773     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1774     grid->grid_refct = 1;
1775     grid->nprow      = nprow;
1776     grid->npcol      = npcol;
1777     grid->myrow      = myrow;
1778     grid->mycol      = mycol;
1779     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1780     grid->ictxrow = Csys2blacs_handle(icomm);
1781     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1782     grid->ictxcol = Csys2blacs_handle(icomm);
1783     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1784     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1785 
1786   } else grid->grid_refct++;
1787   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1788   a->grid = grid;
1789   a->mb   = DEFAULT_BLOCKSIZE;
1790   a->nb   = DEFAULT_BLOCKSIZE;
1791 
1792   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1793   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1794   if (flg) {
1795     a->mb = array[0];
1796     a->nb = (k>1)? array[1]: a->mb;
1797   }
1798   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1799 
1800   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1803   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 /*@C
1808    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1809    (2D block cyclic distribution).
1810 
1811    Collective
1812 
1813    Input Parameters:
1814 +  comm - MPI communicator
1815 .  mb   - row block size (or PETSC_DECIDE to have it set)
1816 .  nb   - column block size (or PETSC_DECIDE to have it set)
1817 .  M    - number of global rows
1818 .  N    - number of global columns
1819 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1820 -  csrc - coordinate of process that owns the first column of the distributed matrix
1821 
1822    Output Parameter:
1823 .  A - the matrix
1824 
1825    Options Database Keys:
1826 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1827 
1828    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1829    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1830    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1831 
1832    Notes:
1833    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1834    is chosen.
1835 
1836    Storage Information:
1837    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1838    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1839    significance and are thus ignored. The block sizes refer to the values
1840    used for the distributed matrix, not the same meaning as in BAIJ.
1841 
1842    Level: intermediate
1843 
1844 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1845 @*/
1846 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1847 {
1848   PetscErrorCode ierr;
1849   Mat_ScaLAPACK  *a;
1850   PetscInt       m,n;
1851 
1852   PetscFunctionBegin;
1853   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1854   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1855   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1856   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1857   m = PETSC_DECIDE;
1858   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1859   n = PETSC_DECIDE;
1860   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1861   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1862   a = (Mat_ScaLAPACK*)(*A)->data;
1863   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1864   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1865   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1866   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1867   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1868   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1869   ierr = MatSetUp(*A);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873