xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 66e17bc34c6358e26731cd86c8afc040b40d70c0)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
17   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
22 {
23   PetscErrorCode    ierr;
24   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
25   PetscBool         iascii;
26   PetscViewerFormat format;
27   Mat               Adense;
28 
29   PetscFunctionBegin;
30   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
31   if (iascii) {
32     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
33     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
34       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
35       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
36       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
37       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
38       PetscFunctionReturn(0);
39     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
40       PetscFunctionReturn(0);
41     }
42   }
43   /* convert to dense format and call MatView() */
44   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
45   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
46   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
51 {
52   PetscErrorCode ierr;
53   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
54   PetscLogDouble isend[2],irecv[2];
55 
56   PetscFunctionBegin;
57   info->block_size = 1.0;
58 
59   isend[0] = a->lld*a->locc;     /* locally allocated */
60   isend[1] = a->locr*a->locc;    /* used submatrix */
61   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
62     info->nz_allocated   = isend[0];
63     info->nz_used        = isend[1];
64   } else if (flag == MAT_GLOBAL_MAX) {
65     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
66     info->nz_allocated   = irecv[0];
67     info->nz_used        = irecv[1];
68   } else if (flag == MAT_GLOBAL_SUM) {
69     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
70     info->nz_allocated   = irecv[0];
71     info->nz_used        = irecv[1];
72   }
73 
74   info->nz_unneeded       = 0;
75   info->assemblies        = A->num_ass;
76   info->mallocs           = 0;
77   info->memory            = ((PetscObject)A)->mem;
78   info->fill_ratio_given  = 0;
79   info->fill_ratio_needed = 0;
80   info->factor_mallocs    = 0;
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
85 {
86   PetscFunctionBegin;
87   switch (op) {
88     case MAT_NEW_NONZERO_LOCATIONS:
89     case MAT_NEW_NONZERO_LOCATION_ERR:
90     case MAT_NEW_NONZERO_ALLOCATION_ERR:
91     case MAT_SYMMETRIC:
92     case MAT_SORTED_FULL:
93     case MAT_HERMITIAN:
94       break;
95     default:
96       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102 {
103   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j;
106   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
107 
108   PetscFunctionBegin;
109   for (i=0;i<nr;i++) {
110     if (rows[i] < 0) continue;
111     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
112     for (j=0;j<nc;j++) {
113       if (cols[j] < 0) continue;
114       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
115       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117         switch (imode) {
118           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121         }
122       } else {
123         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124         A->assembled = PETSC_FALSE;
125         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
126       }
127     }
128   }
129   PetscFunctionReturn(0);
130 }
131 
132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133 {
134   PetscErrorCode ierr;
135   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136   PetscScalar    *x2d,*y2d,alpha=1.0;
137   const PetscInt *ranges;
138   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
139 
140   PetscFunctionBegin;
141   if (transpose) {
142 
143     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
145     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
146     xlld = PetscMax(1,A->rmap->n);
147     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148     PetscCheckScaLapackInfo("descinit",info);
149     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
150     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
151     ylld = 1;
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154 
155     /* allocate 2d vectors */
156     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
159     xlld = PetscMax(1,lszx);
160 
161     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163     PetscCheckScaLapackInfo("descinit",info);
164     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
165     PetscCheckScaLapackInfo("descinit",info);
166 
167     /* redistribute x as a column of a 2d matrix */
168     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
169 
170     /* redistribute y as a row of a 2d matrix */
171     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
172 
173     /* call PBLAS subroutine */
174     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
175 
176     /* redistribute y from a row of a 2d matrix */
177     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
178 
179   } else {   /* non-transpose */
180 
181     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
183     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
184     xlld = 1;
185     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186     PetscCheckScaLapackInfo("descinit",info);
187     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
188     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
189     ylld = PetscMax(1,A->rmap->n);
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192 
193     /* allocate 2d vectors */
194     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
197     ylld = PetscMax(1,lszy);
198 
199     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201     PetscCheckScaLapackInfo("descinit",info);
202     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
203     PetscCheckScaLapackInfo("descinit",info);
204 
205     /* redistribute x as a row of a 2d matrix */
206     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
207 
208     /* redistribute y as a column of a 2d matrix */
209     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
210 
211     /* call PBLAS subroutine */
212     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
213 
214     /* redistribute y from a column of a 2d matrix */
215     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
216 
217   }
218   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223 {
224   PetscErrorCode    ierr;
225   const PetscScalar *xarray;
226   PetscScalar       *yarray;
227 
228   PetscFunctionBegin;
229   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
230   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
231   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
232   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
233   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238 {
239   PetscErrorCode    ierr;
240   const PetscScalar *xarray;
241   PetscScalar       *yarray;
242 
243   PetscFunctionBegin;
244   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
245   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
246   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
247   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
248   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253 {
254   PetscErrorCode    ierr;
255   const PetscScalar *xarray;
256   PetscScalar       *zarray;
257 
258   PetscFunctionBegin;
259   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
260   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
262   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
263   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
264   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269 {
270   PetscErrorCode    ierr;
271   const PetscScalar *xarray;
272   PetscScalar       *zarray;
273 
274   PetscFunctionBegin;
275   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
276   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
277   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
278   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
279   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
280   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285 {
286   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289   PetscScalar   sone=1.0,zero=0.0;
290   PetscBLASInt  one=1;
291 
292   PetscFunctionBegin;
293   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294   C->assembled = PETSC_TRUE;
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299 {
300   PetscErrorCode ierr;
301 
302   PetscFunctionBegin;
303   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
304   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
305   ierr = MatSetUp(C);CHKERRQ(ierr);
306   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307   PetscFunctionReturn(0);
308 }
309 
310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311 {
312   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315   PetscScalar   sone=1.0,zero=0.0;
316   PetscBLASInt  one=1;
317 
318   PetscFunctionBegin;
319   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320   C->assembled = PETSC_TRUE;
321   PetscFunctionReturn(0);
322 }
323 
324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325 {
326   PetscErrorCode ierr;
327 
328   PetscFunctionBegin;
329   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
330   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
331   ierr = MatSetUp(C);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 /* --------------------------------------- */
336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337 {
338   PetscFunctionBegin;
339   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340   C->ops->productsymbolic = MatProductSymbolic_AB;
341   PetscFunctionReturn(0);
342 }
343 
344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345 {
346   PetscFunctionBegin;
347   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349   PetscFunctionReturn(0);
350 }
351 
352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353 {
354   PetscErrorCode ierr;
355   Mat_Product    *product = C->product;
356 
357   PetscFunctionBegin;
358   switch (product->type) {
359     case MATPRODUCT_AB:
360       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
361       break;
362     case MATPRODUCT_ABt:
363       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
364       break;
365     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366   }
367   PetscFunctionReturn(0);
368 }
369 /* --------------------------------------- */
370 
371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372 {
373   PetscErrorCode    ierr;
374   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
375   PetscScalar       *darray,*d2d,v;
376   const PetscInt    *ranges;
377   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
381 
382   if (A->rmap->N<=A->cmap->N) {   /* row version */
383 
384     /* create ScaLAPACK descriptor for vector (1d block distribution) */
385     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
386     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
387     dlld = PetscMax(1,A->rmap->n);
388     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
389     PetscCheckScaLapackInfo("descinit",info);
390 
391     /* allocate 2d vector */
392     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
394     dlld = PetscMax(1,lszd);
395 
396     /* create ScaLAPACK descriptor for vector (2d block distribution) */
397     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
398     PetscCheckScaLapackInfo("descinit",info);
399 
400     /* collect diagonal */
401     for (j=1;j<=a->M;j++) {
402       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404     }
405 
406     /* redistribute d from a column of a 2d matrix */
407     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408     ierr = PetscFree(d2d);CHKERRQ(ierr);
409 
410   } else {   /* column version */
411 
412     /* create ScaLAPACK descriptor for vector (1d block distribution) */
413     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
414     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
415     dlld = 1;
416     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
417     PetscCheckScaLapackInfo("descinit",info);
418 
419     /* allocate 2d vector */
420     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
422 
423     /* create ScaLAPACK descriptor for vector (2d block distribution) */
424     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
425     PetscCheckScaLapackInfo("descinit",info);
426 
427     /* collect diagonal */
428     for (j=1;j<=a->N;j++) {
429       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431     }
432 
433     /* redistribute d from a row of a 2d matrix */
434     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435     ierr = PetscFree(d2d);CHKERRQ(ierr);
436   }
437 
438   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
439   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
440   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445 {
446   PetscErrorCode    ierr;
447   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
448   const PetscScalar *d;
449   const PetscInt    *ranges;
450   PetscScalar       *d2d;
451   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
452 
453   PetscFunctionBegin;
454   if (R) {
455     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
456     /* create ScaLAPACK descriptor for vector (1d block distribution) */
457     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
458     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
459     dlld = 1;
460     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
461     PetscCheckScaLapackInfo("descinit",info);
462 
463     /* allocate 2d vector */
464     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
466 
467     /* create ScaLAPACK descriptor for vector (2d block distribution) */
468     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
469     PetscCheckScaLapackInfo("descinit",info);
470 
471     /* redistribute d to a row of a 2d matrix */
472     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
473 
474     /* broadcast along process columns */
475     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
477 
478     /* local scaling */
479     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
480 
481     ierr = PetscFree(d2d);CHKERRQ(ierr);
482     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
483   }
484   if (L) {
485     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
486     /* create ScaLAPACK descriptor for vector (1d block distribution) */
487     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
488     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
489     dlld = PetscMax(1,A->rmap->n);
490     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
491     PetscCheckScaLapackInfo("descinit",info);
492 
493     /* allocate 2d vector */
494     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
496     dlld = PetscMax(1,lszd);
497 
498     /* create ScaLAPACK descriptor for vector (2d block distribution) */
499     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
500     PetscCheckScaLapackInfo("descinit",info);
501 
502     /* redistribute d to a column of a 2d matrix */
503     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
504 
505     /* broadcast along process rows */
506     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
508 
509     /* local scaling */
510     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
511 
512     ierr = PetscFree(d2d);CHKERRQ(ierr);
513     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519 {
520   PetscFunctionBegin;
521   *missing = PETSC_FALSE;
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526 {
527   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528   PetscBLASInt  n,one=1;
529 
530   PetscFunctionBegin;
531   n = x->lld*x->locc;
532   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537 {
538   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539   PetscBLASInt  i,n;
540   PetscScalar   v;
541 
542   PetscFunctionBegin;
543   n = PetscMin(x->M,x->N);
544   for (i=1;i<=n;i++) {
545     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546     v += alpha;
547     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553 {
554   PetscErrorCode ierr;
555   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
556   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
557   PetscBLASInt   one=1;
558   PetscScalar    beta=1.0;
559 
560   PetscFunctionBegin;
561   MatScaLAPACKCheckDistribution(Y,1,X,3);
562   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568 {
569   PetscErrorCode ierr;
570   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
571   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
572 
573   PetscFunctionBegin;
574   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
575   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580 {
581   Mat            Bs;
582   MPI_Comm       comm;
583   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
584   PetscErrorCode ierr;
585 
586   PetscFunctionBegin;
587   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
588   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
589   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
590   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
591   b = (Mat_ScaLAPACK*)Bs->data;
592   b->M    = a->M;
593   b->N    = a->N;
594   b->mb   = a->mb;
595   b->nb   = a->nb;
596   b->rsrc = a->rsrc;
597   b->csrc = a->csrc;
598   ierr = MatSetUp(Bs);CHKERRQ(ierr);
599   *B = Bs;
600   if (op == MAT_COPY_VALUES) {
601     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
602   }
603   Bs->assembled = PETSC_TRUE;
604   PetscFunctionReturn(0);
605 }
606 
607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608 {
609   PetscErrorCode ierr;
610   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
611   Mat            Bs = *B;
612   PetscBLASInt   one=1;
613   PetscScalar    sone=1.0,zero=0.0;
614 #if defined(PETSC_USE_COMPLEX)
615   PetscInt       i,j;
616 #endif
617 
618   PetscFunctionBegin;
619   if (reuse == MAT_INITIAL_MATRIX) {
620     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
621     *B = Bs;
622   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
623   b = (Mat_ScaLAPACK*)Bs->data;
624   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
625 #if defined(PETSC_USE_COMPLEX)
626   /* undo conjugation */
627   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
628 #endif
629   Bs->assembled = PETSC_TRUE;
630   PetscFunctionReturn(0);
631 }
632 
633 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
634 {
635   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
636   PetscInt      i,j;
637 
638   PetscFunctionBegin;
639   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
640   PetscFunctionReturn(0);
641 }
642 
643 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
644 {
645   PetscErrorCode ierr;
646   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
647   Mat            Bs = *B;
648   PetscBLASInt   one=1;
649   PetscScalar    sone=1.0,zero=0.0;
650 
651   PetscFunctionBegin;
652   if (reuse == MAT_INITIAL_MATRIX) {
653     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
654     *B = Bs;
655   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
656   b = (Mat_ScaLAPACK*)Bs->data;
657   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
658   Bs->assembled = PETSC_TRUE;
659   PetscFunctionReturn(0);
660 }
661 
662 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
663 {
664   PetscErrorCode ierr;
665   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
666   PetscScalar    *x,*x2d;
667   const PetscInt *ranges;
668   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
669 
670   PetscFunctionBegin;
671   ierr = VecCopy(B,X);CHKERRQ(ierr);
672   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
673 
674   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
675   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
676   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
677   xlld = PetscMax(1,A->rmap->n);
678   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
679   PetscCheckScaLapackInfo("descinit",info);
680 
681   /* allocate 2d vector */
682   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
683   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
684   xlld = PetscMax(1,lszx);
685 
686   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
687   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
688   PetscCheckScaLapackInfo("descinit",info);
689 
690   /* redistribute x as a column of a 2d matrix */
691   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
692 
693   /* call ScaLAPACK subroutine */
694   switch (A->factortype) {
695     case MAT_FACTOR_LU:
696       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("getrs",info);
698       break;
699     case MAT_FACTOR_CHOLESKY:
700       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
701       PetscCheckScaLapackInfo("potrs",info);
702       break;
703     default:
704       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
705   }
706 
707   /* redistribute x from a column of a 2d matrix */
708   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
709 
710   ierr = PetscFree(x2d);CHKERRQ(ierr);
711   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
716 {
717   PetscErrorCode ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
721   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
726 {
727   PetscErrorCode ierr;
728   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
729   PetscBool      flg1,flg2;
730   PetscBLASInt   one=1,info;
731 
732   PetscFunctionBegin;
733   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
735   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
736   MatScaLAPACKCheckDistribution(B,1,X,2);
737   b = (Mat_ScaLAPACK*)B->data;
738   x = (Mat_ScaLAPACK*)X->data;
739   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
740 
741   switch (A->factortype) {
742     case MAT_FACTOR_LU:
743       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
744       PetscCheckScaLapackInfo("getrs",info);
745       break;
746     case MAT_FACTOR_CHOLESKY:
747       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
748       PetscCheckScaLapackInfo("potrs",info);
749       break;
750     default:
751       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
757 {
758   PetscErrorCode ierr;
759   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
760   PetscBLASInt   one=1,info;
761 
762   PetscFunctionBegin;
763   if (!a->pivots) {
764     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
765     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
766   }
767   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
768   PetscCheckScaLapackInfo("getrf",info);
769   A->factortype = MAT_FACTOR_LU;
770   A->assembled  = PETSC_TRUE;
771 
772   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
773   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
778 {
779   PetscErrorCode ierr;
780 
781   PetscFunctionBegin;
782   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
783   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
788 {
789   PetscFunctionBegin;
790   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
791   PetscFunctionReturn(0);
792 }
793 
794 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
795 {
796   PetscErrorCode ierr;
797   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
798   PetscBLASInt   one=1,info;
799 
800   PetscFunctionBegin;
801   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
802   PetscCheckScaLapackInfo("potrf",info);
803   A->factortype = MAT_FACTOR_CHOLESKY;
804   A->assembled  = PETSC_TRUE;
805 
806   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
807   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
812 {
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
817   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
822 {
823   PetscFunctionBegin;
824   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
825   PetscFunctionReturn(0);
826 }
827 
828 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
829 {
830   PetscFunctionBegin;
831   *type = MATSOLVERSCALAPACK;
832   PetscFunctionReturn(0);
833 }
834 
835 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
836 {
837   Mat            B;
838   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* Create the factorization matrix */
843   ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr);
844   B->trivialsymbolic = PETSC_TRUE;
845   B->factortype = ftype;
846   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
847   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
848 
849   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
850   *F = B;
851   PetscFunctionReturn(0);
852 }
853 
854 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
855 {
856   PetscErrorCode ierr;
857 
858   PetscFunctionBegin;
859   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
865 {
866   PetscErrorCode ierr;
867   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
868   PetscBLASInt   one=1,lwork=0;
869   const char     *ntype;
870   PetscScalar    *work=NULL,dummy;
871 
872   PetscFunctionBegin;
873   switch (type){
874     case NORM_1:
875       ntype = "1";
876       lwork = PetscMax(a->locr,a->locc);
877       break;
878     case NORM_FROBENIUS:
879       ntype = "F";
880       work  = &dummy;
881       break;
882     case NORM_INFINITY:
883       ntype = "I";
884       lwork = PetscMax(a->locr,a->locc);
885       break;
886     default:
887       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
888   }
889   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
890   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
891   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
892   PetscFunctionReturn(0);
893 }
894 
895 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
896 {
897   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
906 {
907   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
908   PetscErrorCode ierr;
909   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
910 
911   PetscFunctionBegin;
912   if (rows) {
913     n     = a->locr;
914     nb    = a->mb;
915     isrc  = a->rsrc;
916     nproc = a->grid->nprow;
917     iproc = a->grid->myrow;
918     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
919     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
920     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
921   }
922   if (cols) {
923     n     = a->locc;
924     nb    = a->nb;
925     isrc  = a->csrc;
926     nproc = a->grid->npcol;
927     iproc = a->grid->mycol;
928     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
929     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
930     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
936 {
937   PetscErrorCode ierr;
938   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
939   Mat            Bmpi;
940   MPI_Comm       comm;
941   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
942   const PetscInt *ranges,*branges,*cwork;
943   const PetscScalar *vwork;
944   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
945   PetscScalar    *barray;
946   PetscBool      differ=PETSC_FALSE;
947   PetscMPIInt    size;
948 
949   PetscFunctionBegin;
950   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
951   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
952 
953   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
954     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
955     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
956     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
957   }
958 
959   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
960     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
961     m = PETSC_DECIDE;
962     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
963     n = PETSC_DECIDE;
964     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
965     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
966     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
967     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
968 
969     /* create ScaLAPACK descriptor for B (1d block distribution) */
970     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
971     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
972     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
973     PetscCheckScaLapackInfo("descinit",info);
974 
975     /* redistribute matrix */
976     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
977     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
978     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
979     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981 
982     /* transfer rows of auxiliary matrix to the final matrix B */
983     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
984     for (i=rstart;i<rend;i++) {
985       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
986       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
987       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
988     }
989     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
992 
993   } else {  /* normal cases */
994 
995     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
996     else {
997       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
998       m = PETSC_DECIDE;
999       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1000       n = PETSC_DECIDE;
1001       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1002       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1003       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1004       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1005     }
1006 
1007     /* create ScaLAPACK descriptor for B (1d block distribution) */
1008     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1009     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1010     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1011     PetscCheckScaLapackInfo("descinit",info);
1012 
1013     /* redistribute matrix */
1014     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1015     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1016     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1017 
1018     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1019     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1020     if (reuse == MAT_INPLACE_MATRIX) {
1021       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1022     } else *B = Bmpi;
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1028 {
1029   PetscErrorCode ierr;
1030   Mat_ScaLAPACK  *b;
1031   Mat            Bmpi;
1032   MPI_Comm       comm;
1033   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1034   const PetscInt *ranges;
1035   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1036   PetscScalar    *aarray;
1037   PetscInt       lda;
1038 
1039   PetscFunctionBegin;
1040   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1041 
1042   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1043   else {
1044     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1045     m = PETSC_DECIDE;
1046     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1047     n = PETSC_DECIDE;
1048     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1049     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1050     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1051     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1052   }
1053   b = (Mat_ScaLAPACK*)Bmpi->data;
1054 
1055   /* create ScaLAPACK descriptor for A (1d block distribution) */
1056   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1057   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1058   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1059   lld = PetscMax(lda,1);  /* local leading dimension */
1060   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1061   PetscCheckScaLapackInfo("descinit",info);
1062 
1063   /* redistribute matrix */
1064   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1065   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1066   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1067 
1068   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1070   if (reuse == MAT_INPLACE_MATRIX) {
1071     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1072   } else *B = Bmpi;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1077 {
1078   Mat               mat_scal;
1079   PetscErrorCode    ierr;
1080   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1081   const PetscInt    *cols;
1082   const PetscScalar *vals;
1083 
1084   PetscFunctionBegin;
1085   if (reuse == MAT_REUSE_MATRIX) {
1086     mat_scal = *newmat;
1087     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1088   } else {
1089     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1090     m = PETSC_DECIDE;
1091     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1092     n = PETSC_DECIDE;
1093     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1094     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1095     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1096     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1097   }
1098   for (row=rstart;row<rend;row++) {
1099     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1100     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1101     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1102   }
1103   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1105 
1106   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1107   else *newmat = mat_scal;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1112 {
1113   Mat               mat_scal;
1114   PetscErrorCode    ierr;
1115   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1116   const PetscInt    *cols;
1117   const PetscScalar *vals;
1118   PetscScalar       v;
1119 
1120   PetscFunctionBegin;
1121   if (reuse == MAT_REUSE_MATRIX) {
1122     mat_scal = *newmat;
1123     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1124   } else {
1125     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1126     m = PETSC_DECIDE;
1127     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1128     n = PETSC_DECIDE;
1129     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1130     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1131     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1132     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1133   }
1134   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1135   for (row=rstart;row<rend;row++) {
1136     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1137     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1138     for (j=0;j<ncols;j++) { /* lower triangular part */
1139       if (cols[j] == row) continue;
1140       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1141       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1142     }
1143     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1144   }
1145   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1146   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1148 
1149   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1150   else *newmat = mat_scal;
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1155 {
1156   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1157   PetscErrorCode ierr;
1158   PetscInt       sz=0;
1159 
1160   PetscFunctionBegin;
1161   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1162   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1163   if (!a->lld) a->lld = a->locr;
1164 
1165   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1166   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1167   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1168   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1169 
1170   A->preallocated = PETSC_TRUE;
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1175 {
1176   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1177   PetscErrorCode     ierr;
1178   Mat_ScaLAPACK_Grid *grid;
1179   PetscBool          flg;
1180   MPI_Comm           icomm;
1181 
1182   PetscFunctionBegin;
1183   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1184   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1185   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1186   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1187   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1188   if (--grid->grid_refct == 0) {
1189     Cblacs_gridexit(grid->ictxt);
1190     Cblacs_gridexit(grid->ictxrow);
1191     Cblacs_gridexit(grid->ictxcol);
1192     ierr = PetscFree(grid);CHKERRQ(ierr);
1193     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
1194   }
1195   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1199   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1200   ierr = PetscFree(A->data);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1205 {
1206   PetscErrorCode ierr;
1207   const PetscInt *ranges;
1208   PetscMPIInt    size;
1209   PetscInt       i,n;
1210 
1211   PetscFunctionBegin;
1212   ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr);
1213   if (size>2) {
1214     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1215     n = ranges[1]-ranges[0];
1216     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1217     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1218   }
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1223 {
1224   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1225   PetscErrorCode ierr;
1226   PetscBLASInt   info=0;
1227 
1228   PetscFunctionBegin;
1229   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1230   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1231 
1232   /* check that the layout is as enforced by MatCreateScaLAPACK */
1233   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1234   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1235 
1236   /* compute local sizes */
1237   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1238   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1239   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1240   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1241   a->lld  = PetscMax(1,a->locr);
1242 
1243   /* allocate local array */
1244   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1245 
1246   /* set up ScaLAPACK descriptor */
1247   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1248   PetscCheckScaLapackInfo("descinit",info);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1253 {
1254   PetscErrorCode ierr;
1255   PetscInt       nstash,reallocs;
1256 
1257   PetscFunctionBegin;
1258   if (A->nooffprocentries) PetscFunctionReturn(0);
1259   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1260   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1261   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1266 {
1267   PetscErrorCode ierr;
1268   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1269   PetscMPIInt    n;
1270   PetscInt       i,flg,*row,*col;
1271   PetscScalar    *val;
1272   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1273 
1274   PetscFunctionBegin;
1275   if (A->nooffprocentries) PetscFunctionReturn(0);
1276   while (1) {
1277     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1278     if (!flg) break;
1279     for (i=0;i<n;i++) {
1280       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1281       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1282       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1283       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1284       switch (A->insertmode) {
1285         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1286         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1287         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1288       }
1289     }
1290   }
1291   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1296 {
1297   PetscErrorCode ierr;
1298   Mat            Adense,As;
1299   MPI_Comm       comm;
1300 
1301   PetscFunctionBegin;
1302   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1303   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1304   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1305   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1306   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1307   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1308   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 /* -------------------------------------------------------------------*/
1313 static struct _MatOps MatOps_Values = {
1314        MatSetValues_ScaLAPACK,
1315        0,
1316        0,
1317        MatMult_ScaLAPACK,
1318 /* 4*/ MatMultAdd_ScaLAPACK,
1319        MatMultTranspose_ScaLAPACK,
1320        MatMultTransposeAdd_ScaLAPACK,
1321        MatSolve_ScaLAPACK,
1322        MatSolveAdd_ScaLAPACK,
1323        0,
1324 /*10*/ 0,
1325        MatLUFactor_ScaLAPACK,
1326        MatCholeskyFactor_ScaLAPACK,
1327        0,
1328        MatTranspose_ScaLAPACK,
1329 /*15*/ MatGetInfo_ScaLAPACK,
1330        0,
1331        MatGetDiagonal_ScaLAPACK,
1332        MatDiagonalScale_ScaLAPACK,
1333        MatNorm_ScaLAPACK,
1334 /*20*/ MatAssemblyBegin_ScaLAPACK,
1335        MatAssemblyEnd_ScaLAPACK,
1336        MatSetOption_ScaLAPACK,
1337        MatZeroEntries_ScaLAPACK,
1338 /*24*/ 0,
1339        MatLUFactorSymbolic_ScaLAPACK,
1340        MatLUFactorNumeric_ScaLAPACK,
1341        MatCholeskyFactorSymbolic_ScaLAPACK,
1342        MatCholeskyFactorNumeric_ScaLAPACK,
1343 /*29*/ MatSetUp_ScaLAPACK,
1344        0,
1345        0,
1346        0,
1347        0,
1348 /*34*/ MatDuplicate_ScaLAPACK,
1349        0,
1350        0,
1351        0,
1352        0,
1353 /*39*/ MatAXPY_ScaLAPACK,
1354        0,
1355        0,
1356        0,
1357        MatCopy_ScaLAPACK,
1358 /*44*/ 0,
1359        MatScale_ScaLAPACK,
1360        MatShift_ScaLAPACK,
1361        0,
1362        0,
1363 /*49*/ 0,
1364        0,
1365        0,
1366        0,
1367        0,
1368 /*54*/ 0,
1369        0,
1370        0,
1371        0,
1372        0,
1373 /*59*/ 0,
1374        MatDestroy_ScaLAPACK,
1375        MatView_ScaLAPACK,
1376        0,
1377        0,
1378 /*64*/ 0,
1379        0,
1380        0,
1381        0,
1382        0,
1383 /*69*/ 0,
1384        0,
1385        MatConvert_ScaLAPACK_Dense,
1386        0,
1387        0,
1388 /*74*/ 0,
1389        0,
1390        0,
1391        0,
1392        0,
1393 /*79*/ 0,
1394        0,
1395        0,
1396        0,
1397        MatLoad_ScaLAPACK,
1398 /*84*/ 0,
1399        0,
1400        0,
1401        0,
1402        0,
1403 /*89*/ 0,
1404        0,
1405        MatMatMultNumeric_ScaLAPACK,
1406        0,
1407        0,
1408 /*94*/ 0,
1409        0,
1410        0,
1411        MatMatTransposeMultNumeric_ScaLAPACK,
1412        0,
1413 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1414        0,
1415        0,
1416        MatConjugate_ScaLAPACK,
1417        0,
1418 /*104*/0,
1419        0,
1420        0,
1421        0,
1422        0,
1423 /*109*/MatMatSolve_ScaLAPACK,
1424        0,
1425        0,
1426        0,
1427        MatMissingDiagonal_ScaLAPACK,
1428 /*114*/0,
1429        0,
1430        0,
1431        0,
1432        0,
1433 /*119*/0,
1434        MatHermitianTranspose_ScaLAPACK,
1435        0,
1436        0,
1437        0,
1438 /*124*/0,
1439        0,
1440        0,
1441        0,
1442        0,
1443 /*129*/0,
1444        0,
1445        0,
1446        0,
1447        0,
1448 /*134*/0,
1449        0,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*140*/0,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*145*/0,
1460        0,
1461        0
1462 };
1463 
1464 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1465 {
1466   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1467   PetscInt           size=stash->size,nsends;
1468   PetscErrorCode     ierr;
1469   PetscInt           count,*sindices,**rindices,i,j,l;
1470   PetscScalar        **rvalues,*svalues;
1471   MPI_Comm           comm = stash->comm;
1472   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1473   PetscMPIInt        *sizes,*nlengths,nreceives;
1474   PetscInt           *sp_idx,*sp_idy;
1475   PetscScalar        *sp_val;
1476   PetscMatStashSpace space,space_next;
1477   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1478   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1479 
1480   PetscFunctionBegin;
1481   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1482     InsertMode addv;
1483     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1484     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1485     mat->insertmode = addv; /* in case this processor had no cache */
1486   }
1487 
1488   bs2 = stash->bs*stash->bs;
1489 
1490   /*  first count number of contributors to each processor */
1491   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1492   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1493 
1494   i     = j    = 0;
1495   space = stash->space_head;
1496   while (space) {
1497     space_next = space->next;
1498     for (l=0; l<space->local_used; l++) {
1499       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1500       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1501       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1502       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1503       nlengths[j]++; owner[i] = j;
1504       i++;
1505     }
1506     space = space_next;
1507   }
1508 
1509   /* Now check what procs get messages - and compute nsends. */
1510   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1511   for (i=0, nsends=0; i<size; i++) {
1512     if (nlengths[i]) {
1513       sizes[i] = 1; nsends++;
1514     }
1515   }
1516 
1517   {PetscMPIInt *onodes,*olengths;
1518    /* Determine the number of messages to expect, their lengths, from from-ids */
1519    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1520    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1521    /* since clubbing row,col - lengths are multiplied by 2 */
1522    for (i=0; i<nreceives; i++) olengths[i] *=2;
1523    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1524    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1525    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1526    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1527    ierr = PetscFree(onodes);CHKERRQ(ierr);
1528    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1529 
1530   /* do sends:
1531       1) starts[i] gives the starting index in svalues for stuff going to
1532          the ith processor
1533   */
1534   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1535   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1536   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1537   /* use 2 sends the first with all_a, the next with all_i and all_j */
1538   startv[0] = 0; starti[0] = 0;
1539   for (i=1; i<size; i++) {
1540     startv[i] = startv[i-1] + nlengths[i-1];
1541     starti[i] = starti[i-1] + 2*nlengths[i-1];
1542   }
1543 
1544   i     = 0;
1545   space = stash->space_head;
1546   while (space) {
1547     space_next = space->next;
1548     sp_idx     = space->idx;
1549     sp_idy     = space->idy;
1550     sp_val     = space->val;
1551     for (l=0; l<space->local_used; l++) {
1552       j = owner[i];
1553       if (bs2 == 1) {
1554         svalues[startv[j]] = sp_val[l];
1555       } else {
1556         PetscInt    k;
1557         PetscScalar *buf1,*buf2;
1558         buf1 = svalues+bs2*startv[j];
1559         buf2 = space->val + bs2*l;
1560         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1561       }
1562       sindices[starti[j]]             = sp_idx[l];
1563       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1564       startv[j]++;
1565       starti[j]++;
1566       i++;
1567     }
1568     space = space_next;
1569   }
1570   startv[0] = 0;
1571   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1572 
1573   for (i=0,count=0; i<size; i++) {
1574     if (sizes[i]) {
1575       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr);
1576       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr);
1577     }
1578   }
1579 #if defined(PETSC_USE_INFO)
1580   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1581   for (i=0; i<size; i++) {
1582     if (sizes[i]) {
1583       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1584     }
1585   }
1586 #endif
1587   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1588   ierr = PetscFree(owner);CHKERRQ(ierr);
1589   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1590   ierr = PetscFree(sizes);CHKERRQ(ierr);
1591 
1592   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1593   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1594 
1595   for (i=0; i<nreceives; i++) {
1596     recv_waits[2*i]   = recv_waits1[i];
1597     recv_waits[2*i+1] = recv_waits2[i];
1598   }
1599   stash->recv_waits = recv_waits;
1600 
1601   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1602   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1603 
1604   stash->svalues         = svalues;
1605   stash->sindices        = sindices;
1606   stash->rvalues         = rvalues;
1607   stash->rindices        = rindices;
1608   stash->send_waits      = send_waits;
1609   stash->nsends          = nsends;
1610   stash->nrecvs          = nreceives;
1611   stash->reproduce_count = 0;
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1616 {
1617   PetscErrorCode ierr;
1618   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1619 
1620   PetscFunctionBegin;
1621   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1622   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1623   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1624   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1625   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 /*@
1630    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1631    the ScaLAPACK matrix
1632 
1633    Logically Collective on A
1634 
1635    Input Parameter:
1636 +  A  - a MATSCALAPACK matrix
1637 .  mb - the row block size
1638 -  nb - the column block size
1639 
1640    Level: intermediate
1641 
1642 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1643 @*/
1644 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1645 {
1646   PetscErrorCode ierr;
1647 
1648   PetscFunctionBegin;
1649   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1650   PetscValidLogicalCollectiveInt(A,mb,2);
1651   PetscValidLogicalCollectiveInt(A,nb,3);
1652   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1657 {
1658   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1659 
1660   PetscFunctionBegin;
1661   if (mb) *mb = a->mb;
1662   if (nb) *nb = a->nb;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 /*@
1667    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1668    the ScaLAPACK matrix
1669 
1670    Not collective
1671 
1672    Input Parameter:
1673 .  A  - a MATSCALAPACK matrix
1674 
1675    Output Parameters:
1676 +  mb - the row block size
1677 -  nb - the column block size
1678 
1679    Level: intermediate
1680 
1681 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1682 @*/
1683 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1684 {
1685   PetscErrorCode ierr;
1686 
1687   PetscFunctionBegin;
1688   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1689   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1694 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1695 
1696 /*MC
1697    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1698 
1699    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1700 
1701    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1702 
1703    Options Database Keys:
1704 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1705 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1706 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1707 
1708    Level: beginner
1709 
1710 .seealso: MATDENSE, MATELEMENTAL
1711 M*/
1712 
1713 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1714 {
1715   Mat_ScaLAPACK      *a;
1716   PetscErrorCode     ierr;
1717   PetscBool          flg,flg1;
1718   Mat_ScaLAPACK_Grid *grid;
1719   MPI_Comm           icomm;
1720   PetscBLASInt       nprow,npcol,myrow,mycol;
1721   PetscInt           optv1,k=2,array[2]={0,0};
1722   PetscMPIInt        size;
1723 
1724   PetscFunctionBegin;
1725   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1726   A->insertmode = NOT_SET_VALUES;
1727 
1728   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1729   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1730   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1731   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1732   A->stash.ScatterDestroy = NULL;
1733 
1734   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1735   A->data = (void*)a;
1736 
1737   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1738   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1739     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr);
1740     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1741   }
1742   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1743   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1744   if (!flg) {
1745     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1746 
1747     ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr);
1748     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1749 
1750     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1751     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1752     if (flg1) {
1753       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1754       grid->nprow = optv1;
1755     }
1756     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1757 
1758     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1759     grid->npcol = size/grid->nprow;
1760     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1761     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1762     grid->ictxt = Csys2blacs_handle(icomm);
1763     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1764     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1765     grid->grid_refct = 1;
1766     grid->nprow      = nprow;
1767     grid->npcol      = npcol;
1768     grid->myrow      = myrow;
1769     grid->mycol      = mycol;
1770     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1771     grid->ictxrow = Csys2blacs_handle(icomm);
1772     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1773     grid->ictxcol = Csys2blacs_handle(icomm);
1774     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1775     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr);
1776 
1777   } else grid->grid_refct++;
1778   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1779   a->grid = grid;
1780   a->mb   = DEFAULT_BLOCKSIZE;
1781   a->nb   = DEFAULT_BLOCKSIZE;
1782 
1783   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1784   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1785   if (flg) {
1786     a->mb = array[0];
1787     a->nb = (k>1)? array[1]: a->mb;
1788   }
1789   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1790 
1791   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1794   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 /*@C
1799    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1800    (2D block cyclic distribution).
1801 
1802    Collective
1803 
1804    Input Parameters:
1805 +  comm - MPI communicator
1806 .  mb   - row block size (or PETSC_DECIDE to have it set)
1807 .  nb   - column block size (or PETSC_DECIDE to have it set)
1808 .  M    - number of global rows
1809 .  N    - number of global columns
1810 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1811 -  csrc - coordinate of process that owns the first column of the distributed matrix
1812 
1813    Output Parameter:
1814 .  A - the matrix
1815 
1816    Options Database Keys:
1817 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1818 
1819    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1820    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1821    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1822 
1823    Notes:
1824    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1825    is chosen.
1826 
1827    Storage Information:
1828    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1829    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1830    significance and are thus ignored. The block sizes refer to the values
1831    used for the distributed matrix, not the same meaning as in BAIJ.
1832 
1833    Level: intermediate
1834 
1835 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1836 @*/
1837 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1838 {
1839   PetscErrorCode ierr;
1840   Mat_ScaLAPACK  *a;
1841   PetscInt       m,n;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1845   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1846   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1847   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1848   m = PETSC_DECIDE;
1849   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1850   n = PETSC_DECIDE;
1851   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1852   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1853   a = (Mat_ScaLAPACK*)(*A)->data;
1854   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1855   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1856   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1857   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1858   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1859   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1860   ierr = MatSetUp(*A);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 
1864