xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 7d1b7f08ecf63081ec25c2ae9bbc0564ea4e8a8e)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
17   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
22 {
23   PetscErrorCode    ierr;
24   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
25   PetscBool         iascii;
26   PetscViewerFormat format;
27   Mat               Adense;
28 
29   PetscFunctionBegin;
30   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
31   if (iascii) {
32     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
33     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
34       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
35       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
36       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
37       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
38       PetscFunctionReturn(0);
39     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
40       PetscFunctionReturn(0);
41     }
42   }
43   /* convert to dense format and call MatView() */
44   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
45   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
46   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
51 {
52   PetscErrorCode ierr;
53   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
54   PetscLogDouble isend[2],irecv[2];
55 
56   PetscFunctionBegin;
57   info->block_size = 1.0;
58 
59   isend[0] = a->lld*a->locc;     /* locally allocated */
60   isend[1] = a->locr*a->locc;    /* used submatrix */
61   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
62     info->nz_allocated   = isend[0];
63     info->nz_used        = isend[1];
64   } else if (flag == MAT_GLOBAL_MAX) {
65     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
66     info->nz_allocated   = irecv[0];
67     info->nz_used        = irecv[1];
68   } else if (flag == MAT_GLOBAL_SUM) {
69     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
70     info->nz_allocated   = irecv[0];
71     info->nz_used        = irecv[1];
72   }
73 
74   info->nz_unneeded       = 0;
75   info->assemblies        = A->num_ass;
76   info->mallocs           = 0;
77   info->memory            = ((PetscObject)A)->mem;
78   info->fill_ratio_given  = 0;
79   info->fill_ratio_needed = 0;
80   info->factor_mallocs    = 0;
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
85 {
86   PetscFunctionBegin;
87   switch (op) {
88     case MAT_NEW_NONZERO_LOCATIONS:
89     case MAT_NEW_NONZERO_LOCATION_ERR:
90     case MAT_NEW_NONZERO_ALLOCATION_ERR:
91     case MAT_SYMMETRIC:
92     case MAT_SORTED_FULL:
93     case MAT_HERMITIAN:
94       break;
95     default:
96       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102 {
103   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j;
106   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
107 
108   PetscFunctionBegin;
109   for (i=0;i<nr;i++) {
110     if (rows[i] < 0) continue;
111     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
112     for (j=0;j<nc;j++) {
113       if (cols[j] < 0) continue;
114       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
115       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117         switch (imode) {
118           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121         }
122       } else {
123         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124         A->assembled = PETSC_FALSE;
125         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
126       }
127     }
128   }
129   PetscFunctionReturn(0);
130 }
131 
132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133 {
134   PetscErrorCode ierr;
135   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136   PetscScalar    *x2d,*y2d,alpha=1.0;
137   const PetscInt *ranges;
138   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
139 
140   PetscFunctionBegin;
141   if (transpose) {
142 
143     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
145     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
146     xlld = PetscMax(1,A->rmap->n);
147     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148     PetscCheckScaLapackInfo("descinit",info);
149     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
150     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
151     ylld = 1;
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154 
155     /* allocate 2d vectors */
156     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
159     xlld = PetscMax(1,lszx);
160 
161     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163     PetscCheckScaLapackInfo("descinit",info);
164     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
165     PetscCheckScaLapackInfo("descinit",info);
166 
167     /* redistribute x as a column of a 2d matrix */
168     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
169 
170     /* redistribute y as a row of a 2d matrix */
171     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
172 
173     /* call PBLAS subroutine */
174     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
175 
176     /* redistribute y from a row of a 2d matrix */
177     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
178 
179   } else {   /* non-transpose */
180 
181     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
183     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
184     xlld = 1;
185     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186     PetscCheckScaLapackInfo("descinit",info);
187     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
188     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
189     ylld = PetscMax(1,A->rmap->n);
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192 
193     /* allocate 2d vectors */
194     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
197     ylld = PetscMax(1,lszy);
198 
199     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201     PetscCheckScaLapackInfo("descinit",info);
202     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
203     PetscCheckScaLapackInfo("descinit",info);
204 
205     /* redistribute x as a row of a 2d matrix */
206     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
207 
208     /* redistribute y as a column of a 2d matrix */
209     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
210 
211     /* call PBLAS subroutine */
212     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
213 
214     /* redistribute y from a column of a 2d matrix */
215     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
216 
217   }
218   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223 {
224   PetscErrorCode    ierr;
225   const PetscScalar *xarray;
226   PetscScalar       *yarray;
227 
228   PetscFunctionBegin;
229   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
230   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
231   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
232   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
233   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238 {
239   PetscErrorCode    ierr;
240   const PetscScalar *xarray;
241   PetscScalar       *yarray;
242 
243   PetscFunctionBegin;
244   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
245   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
246   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
247   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
248   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253 {
254   PetscErrorCode    ierr;
255   const PetscScalar *xarray;
256   PetscScalar       *zarray;
257 
258   PetscFunctionBegin;
259   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
260   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
262   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
263   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
264   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269 {
270   PetscErrorCode    ierr;
271   const PetscScalar *xarray;
272   PetscScalar       *zarray;
273 
274   PetscFunctionBegin;
275   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
276   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
277   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
278   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
279   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
280   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285 {
286   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289   PetscScalar   sone=1.0,zero=0.0;
290   PetscBLASInt  one=1;
291 
292   PetscFunctionBegin;
293   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294   C->assembled = PETSC_TRUE;
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299 {
300   PetscErrorCode ierr;
301 
302   PetscFunctionBegin;
303   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
304   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
305   ierr = MatSetUp(C);CHKERRQ(ierr);
306   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307   PetscFunctionReturn(0);
308 }
309 
310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311 {
312   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315   PetscScalar   sone=1.0,zero=0.0;
316   PetscBLASInt  one=1;
317 
318   PetscFunctionBegin;
319   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320   C->assembled = PETSC_TRUE;
321   PetscFunctionReturn(0);
322 }
323 
324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325 {
326   PetscErrorCode ierr;
327 
328   PetscFunctionBegin;
329   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
330   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
331   ierr = MatSetUp(C);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 /* --------------------------------------- */
336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337 {
338   PetscFunctionBegin;
339   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340   C->ops->productsymbolic = MatProductSymbolic_AB;
341   PetscFunctionReturn(0);
342 }
343 
344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345 {
346   PetscFunctionBegin;
347   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349   PetscFunctionReturn(0);
350 }
351 
352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353 {
354   PetscErrorCode ierr;
355   Mat_Product    *product = C->product;
356 
357   PetscFunctionBegin;
358   switch (product->type) {
359     case MATPRODUCT_AB:
360       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
361       break;
362     case MATPRODUCT_ABt:
363       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
364       break;
365     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366   }
367   PetscFunctionReturn(0);
368 }
369 /* --------------------------------------- */
370 
371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372 {
373   PetscErrorCode    ierr;
374   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
375   PetscScalar       *darray,*d2d,v;
376   const PetscInt    *ranges;
377   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
381 
382   if (A->rmap->N<=A->cmap->N) {   /* row version */
383 
384     /* create ScaLAPACK descriptor for vector (1d block distribution) */
385     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
386     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
387     dlld = PetscMax(1,A->rmap->n);
388     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
389     PetscCheckScaLapackInfo("descinit",info);
390 
391     /* allocate 2d vector */
392     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
394     dlld = PetscMax(1,lszd);
395 
396     /* create ScaLAPACK descriptor for vector (2d block distribution) */
397     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
398     PetscCheckScaLapackInfo("descinit",info);
399 
400     /* collect diagonal */
401     for (j=1;j<=a->M;j++) {
402       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404     }
405 
406     /* redistribute d from a column of a 2d matrix */
407     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408     ierr = PetscFree(d2d);CHKERRQ(ierr);
409 
410   } else {   /* column version */
411 
412     /* create ScaLAPACK descriptor for vector (1d block distribution) */
413     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
414     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
415     dlld = 1;
416     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
417     PetscCheckScaLapackInfo("descinit",info);
418 
419     /* allocate 2d vector */
420     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
422 
423     /* create ScaLAPACK descriptor for vector (2d block distribution) */
424     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
425     PetscCheckScaLapackInfo("descinit",info);
426 
427     /* collect diagonal */
428     for (j=1;j<=a->N;j++) {
429       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431     }
432 
433     /* redistribute d from a row of a 2d matrix */
434     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435     ierr = PetscFree(d2d);CHKERRQ(ierr);
436   }
437 
438   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
439   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
440   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445 {
446   PetscErrorCode    ierr;
447   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
448   const PetscScalar *d;
449   const PetscInt    *ranges;
450   PetscScalar       *d2d;
451   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
452 
453   PetscFunctionBegin;
454   if (R) {
455     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
456     /* create ScaLAPACK descriptor for vector (1d block distribution) */
457     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
458     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
459     dlld = 1;
460     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
461     PetscCheckScaLapackInfo("descinit",info);
462 
463     /* allocate 2d vector */
464     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
466 
467     /* create ScaLAPACK descriptor for vector (2d block distribution) */
468     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
469     PetscCheckScaLapackInfo("descinit",info);
470 
471     /* redistribute d to a row of a 2d matrix */
472     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
473 
474     /* broadcast along process columns */
475     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
477 
478     /* local scaling */
479     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
480 
481     ierr = PetscFree(d2d);CHKERRQ(ierr);
482     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
483   }
484   if (L) {
485     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
486     /* create ScaLAPACK descriptor for vector (1d block distribution) */
487     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
488     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
489     dlld = PetscMax(1,A->rmap->n);
490     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
491     PetscCheckScaLapackInfo("descinit",info);
492 
493     /* allocate 2d vector */
494     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
496     dlld = PetscMax(1,lszd);
497 
498     /* create ScaLAPACK descriptor for vector (2d block distribution) */
499     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
500     PetscCheckScaLapackInfo("descinit",info);
501 
502     /* redistribute d to a column of a 2d matrix */
503     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
504 
505     /* broadcast along process rows */
506     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
508 
509     /* local scaling */
510     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
511 
512     ierr = PetscFree(d2d);CHKERRQ(ierr);
513     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519 {
520   PetscFunctionBegin;
521   *missing = PETSC_FALSE;
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526 {
527   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528   PetscBLASInt  n,one=1;
529 
530   PetscFunctionBegin;
531   n = x->lld*x->locc;
532   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537 {
538   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539   PetscBLASInt  i,n;
540   PetscScalar   v;
541 
542   PetscFunctionBegin;
543   n = PetscMin(x->M,x->N);
544   for (i=1;i<=n;i++) {
545     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546     v += alpha;
547     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553 {
554   PetscErrorCode ierr;
555   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
556   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
557   PetscBLASInt   one=1;
558   PetscScalar    beta=1.0;
559 
560   PetscFunctionBegin;
561   MatScaLAPACKCheckDistribution(Y,1,X,3);
562   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568 {
569   PetscErrorCode ierr;
570   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
571   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
572 
573   PetscFunctionBegin;
574   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
575   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580 {
581   Mat            Bs;
582   MPI_Comm       comm;
583   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
584   PetscErrorCode ierr;
585 
586   PetscFunctionBegin;
587   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
588   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
589   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
590   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
591   b = (Mat_ScaLAPACK*)Bs->data;
592   b->M    = a->M;
593   b->N    = a->N;
594   b->mb   = a->mb;
595   b->nb   = a->nb;
596   b->rsrc = a->rsrc;
597   b->csrc = a->csrc;
598   ierr = MatSetUp(Bs);CHKERRQ(ierr);
599   *B = Bs;
600   if (op == MAT_COPY_VALUES) {
601     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
602   }
603   Bs->assembled = PETSC_TRUE;
604   PetscFunctionReturn(0);
605 }
606 
607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608 {
609   PetscErrorCode ierr;
610   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
611   Mat            Bs = *B;
612   PetscBLASInt   one=1;
613   PetscScalar    sone=1.0,zero=0.0;
614 #if defined(PETSC_USE_COMPLEX)
615   PetscInt       i,j;
616 #endif
617 
618   PetscFunctionBegin;
619   if (reuse == MAT_INITIAL_MATRIX) {
620     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
621     *B = Bs;
622   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
623   b = (Mat_ScaLAPACK*)Bs->data;
624   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
625 #if defined(PETSC_USE_COMPLEX)
626   /* undo conjugation */
627   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
628 #endif
629   Bs->assembled = PETSC_TRUE;
630   PetscFunctionReturn(0);
631 }
632 
633 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
634 {
635   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
636   PetscInt      i,j;
637 
638   PetscFunctionBegin;
639   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
640   PetscFunctionReturn(0);
641 }
642 
643 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
644 {
645   PetscErrorCode ierr;
646   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
647   Mat            Bs = *B;
648   PetscBLASInt   one=1;
649   PetscScalar    sone=1.0,zero=0.0;
650 
651   PetscFunctionBegin;
652   if (reuse == MAT_INITIAL_MATRIX) {
653     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
654     *B = Bs;
655   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
656   b = (Mat_ScaLAPACK*)Bs->data;
657   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
658   Bs->assembled = PETSC_TRUE;
659   PetscFunctionReturn(0);
660 }
661 
662 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
663 {
664   PetscErrorCode ierr;
665   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
666   PetscScalar    *x,*x2d;
667   const PetscInt *ranges;
668   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
669 
670   PetscFunctionBegin;
671   ierr = VecCopy(B,X);CHKERRQ(ierr);
672   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
673 
674   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
675   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
676   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
677   xlld = PetscMax(1,A->rmap->n);
678   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
679   PetscCheckScaLapackInfo("descinit",info);
680 
681   /* allocate 2d vector */
682   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
683   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
684   xlld = PetscMax(1,lszx);
685 
686   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
687   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
688   PetscCheckScaLapackInfo("descinit",info);
689 
690   /* redistribute x as a column of a 2d matrix */
691   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
692 
693   /* call ScaLAPACK subroutine */
694   switch (A->factortype) {
695     case MAT_FACTOR_LU:
696       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("getrs",info);
698       break;
699     case MAT_FACTOR_CHOLESKY:
700       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
701       PetscCheckScaLapackInfo("potrs",info);
702       break;
703     default:
704       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
705   }
706 
707   /* redistribute x from a column of a 2d matrix */
708   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
709 
710   ierr = PetscFree(x2d);CHKERRQ(ierr);
711   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
716 {
717   PetscErrorCode ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
721   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
726 {
727   PetscErrorCode ierr;
728   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
729   PetscBool      flg1,flg2;
730   PetscBLASInt   one=1,info;
731 
732   PetscFunctionBegin;
733   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
735   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
736   MatScaLAPACKCheckDistribution(B,1,X,2);
737   b = (Mat_ScaLAPACK*)B->data;
738   x = (Mat_ScaLAPACK*)X->data;
739   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
740 
741   switch (A->factortype) {
742     case MAT_FACTOR_LU:
743       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
744       PetscCheckScaLapackInfo("getrs",info);
745       break;
746     case MAT_FACTOR_CHOLESKY:
747       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
748       PetscCheckScaLapackInfo("potrs",info);
749       break;
750     default:
751       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
757 {
758   PetscErrorCode ierr;
759   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
760   PetscBLASInt   one=1,info;
761 
762   PetscFunctionBegin;
763   if (!a->pivots) {
764     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
765     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
766   }
767   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
768   PetscCheckScaLapackInfo("getrf",info);
769   A->factortype = MAT_FACTOR_LU;
770   A->assembled  = PETSC_TRUE;
771 
772   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
773   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
778 {
779   PetscErrorCode ierr;
780 
781   PetscFunctionBegin;
782   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
783   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
788 {
789   PetscFunctionBegin;
790   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
791   PetscFunctionReturn(0);
792 }
793 
794 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
795 {
796   PetscErrorCode ierr;
797   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
798   PetscBLASInt   one=1,info;
799 
800   PetscFunctionBegin;
801   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
802   PetscCheckScaLapackInfo("potrf",info);
803   A->factortype = MAT_FACTOR_CHOLESKY;
804   A->assembled  = PETSC_TRUE;
805 
806   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
807   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
812 {
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
817   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
822 {
823   PetscFunctionBegin;
824   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
825   PetscFunctionReturn(0);
826 }
827 
828 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
829 {
830   PetscFunctionBegin;
831   *type = MATSOLVERSCALAPACK;
832   PetscFunctionReturn(0);
833 }
834 
835 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
836 {
837   Mat            B;
838   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* Create the factorization matrix */
843   ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr);
844   B->factortype = ftype;
845   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847 
848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849   *F = B;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864 {
865   PetscErrorCode ierr;
866   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867   PetscBLASInt   one=1,lwork=0;
868   const char     *ntype;
869   PetscScalar    *work=NULL,dummy;
870 
871   PetscFunctionBegin;
872   switch (type){
873     case NORM_1:
874       ntype = "1";
875       lwork = PetscMax(a->locr,a->locc);
876       break;
877     case NORM_FROBENIUS:
878       ntype = "F";
879       work  = &dummy;
880       break;
881     case NORM_INFINITY:
882       ntype = "I";
883       lwork = PetscMax(a->locr,a->locc);
884       break;
885     default:
886       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887   }
888   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891   PetscFunctionReturn(0);
892 }
893 
894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895 {
896   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905 {
906   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
909 
910   PetscFunctionBegin;
911   if (rows) {
912     n     = a->locr;
913     nb    = a->mb;
914     isrc  = a->rsrc;
915     nproc = a->grid->nprow;
916     iproc = a->grid->myrow;
917     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920   }
921   if (cols) {
922     n     = a->locc;
923     nb    = a->nb;
924     isrc  = a->csrc;
925     nproc = a->grid->npcol;
926     iproc = a->grid->mycol;
927     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935 {
936   PetscErrorCode ierr;
937   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938   Mat            Bmpi;
939   MPI_Comm       comm;
940   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
941   const PetscInt *ranges,*branges,*cwork;
942   const PetscScalar *vwork;
943   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
944   PetscScalar    *barray;
945   PetscBool      differ=PETSC_FALSE;
946   PetscMPIInt    size;
947 
948   PetscFunctionBegin;
949   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
950   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
951 
952   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
953     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
954     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
955     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
956   }
957 
958   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
959     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
960     m = PETSC_DECIDE;
961     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
962     n = PETSC_DECIDE;
963     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
964     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
965     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
966     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
967 
968     /* create ScaLAPACK descriptor for B (1d block distribution) */
969     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
970     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
971     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
972     PetscCheckScaLapackInfo("descinit",info);
973 
974     /* redistribute matrix */
975     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
976     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
977     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
978     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980 
981     /* transfer rows of auxiliary matrix to the final matrix B */
982     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
983     for (i=rstart;i<rend;i++) {
984       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
985       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
986       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
987     }
988     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
989     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
991 
992   } else {  /* normal cases */
993 
994     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
995     else {
996       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
997       m = PETSC_DECIDE;
998       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
999       n = PETSC_DECIDE;
1000       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1001       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1002       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1003       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1004     }
1005 
1006     /* create ScaLAPACK descriptor for B (1d block distribution) */
1007     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1008     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1009     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1010     PetscCheckScaLapackInfo("descinit",info);
1011 
1012     /* redistribute matrix */
1013     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1014     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1015     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1016 
1017     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1018     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1019     if (reuse == MAT_INPLACE_MATRIX) {
1020       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1021     } else *B = Bmpi;
1022   }
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1027 {
1028   PetscErrorCode ierr;
1029   Mat_ScaLAPACK  *b;
1030   Mat            Bmpi;
1031   MPI_Comm       comm;
1032   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1033   const PetscInt *ranges;
1034   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1035   PetscScalar    *aarray;
1036   PetscInt       lda;
1037 
1038   PetscFunctionBegin;
1039   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1040 
1041   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1042   else {
1043     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1044     m = PETSC_DECIDE;
1045     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1046     n = PETSC_DECIDE;
1047     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1048     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1049     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1050     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1051   }
1052   b = (Mat_ScaLAPACK*)Bmpi->data;
1053 
1054   /* create ScaLAPACK descriptor for A (1d block distribution) */
1055   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1056   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1057   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1058   lld = PetscMax(lda,1);  /* local leading dimension */
1059   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1060   PetscCheckScaLapackInfo("descinit",info);
1061 
1062   /* redistribute matrix */
1063   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1064   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1065   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1066 
1067   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1068   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069   if (reuse == MAT_INPLACE_MATRIX) {
1070     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1071   } else *B = Bmpi;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1076 {
1077   Mat               mat_scal;
1078   PetscErrorCode    ierr;
1079   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1080   const PetscInt    *cols;
1081   const PetscScalar *vals;
1082 
1083   PetscFunctionBegin;
1084   if (reuse == MAT_REUSE_MATRIX) {
1085     mat_scal = *newmat;
1086     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1087   } else {
1088     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1089     m = PETSC_DECIDE;
1090     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1091     n = PETSC_DECIDE;
1092     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1093     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1094     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1095     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1096   }
1097   for (row=rstart;row<rend;row++) {
1098     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1099     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1100     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1101   }
1102   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1103   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104 
1105   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1106   else *newmat = mat_scal;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1111 {
1112   Mat               mat_scal;
1113   PetscErrorCode    ierr;
1114   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1115   const PetscInt    *cols;
1116   const PetscScalar *vals;
1117   PetscScalar       v;
1118 
1119   PetscFunctionBegin;
1120   if (reuse == MAT_REUSE_MATRIX) {
1121     mat_scal = *newmat;
1122     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1123   } else {
1124     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1125     m = PETSC_DECIDE;
1126     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1127     n = PETSC_DECIDE;
1128     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1129     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1130     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1131     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1132   }
1133   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1134   for (row=rstart;row<rend;row++) {
1135     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1136     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1137     for (j=0;j<ncols;j++) { /* lower triangular part */
1138       if (cols[j] == row) continue;
1139       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1140       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1141     }
1142     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1143   }
1144   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1145   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147 
1148   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1149   else *newmat = mat_scal;
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1154 {
1155   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1156   PetscErrorCode ierr;
1157   PetscInt       sz=0;
1158 
1159   PetscFunctionBegin;
1160   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1161   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1162   if (!a->lld) a->lld = a->locr;
1163 
1164   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1165   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1166   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1167   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1168 
1169   A->preallocated = PETSC_TRUE;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1174 {
1175   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1176   PetscErrorCode     ierr;
1177   Mat_ScaLAPACK_Grid *grid;
1178   PetscBool          flg;
1179   MPI_Comm           icomm;
1180 
1181   PetscFunctionBegin;
1182   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1183   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1184   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1185   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1186   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1187   if (--grid->grid_refct == 0) {
1188     Cblacs_gridexit(grid->ictxt);
1189     Cblacs_gridexit(grid->ictxrow);
1190     Cblacs_gridexit(grid->ictxcol);
1191     ierr = PetscFree(grid);CHKERRQ(ierr);
1192     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
1193   }
1194   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1199   ierr = PetscFree(A->data);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1204 {
1205   PetscErrorCode ierr;
1206   const PetscInt *ranges;
1207   PetscMPIInt    size;
1208   PetscInt       i,n;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr);
1212   if (size>2) {
1213     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1214     n = ranges[1]-ranges[0];
1215     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1216     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1222 {
1223   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1224   PetscErrorCode ierr;
1225   PetscBLASInt   info=0;
1226 
1227   PetscFunctionBegin;
1228   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1229   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1230 
1231   /* check that the layout is as enforced by MatCreateScaLAPACK */
1232   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1233   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1234 
1235   /* compute local sizes */
1236   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1237   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1238   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1239   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1240   a->lld  = PetscMax(1,a->locr);
1241 
1242   /* allocate local array */
1243   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1244 
1245   /* set up ScaLAPACK descriptor */
1246   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1247   PetscCheckScaLapackInfo("descinit",info);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1252 {
1253   PetscErrorCode ierr;
1254   PetscInt       nstash,reallocs;
1255 
1256   PetscFunctionBegin;
1257   if (A->nooffprocentries) PetscFunctionReturn(0);
1258   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1259   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1260   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265 {
1266   PetscErrorCode ierr;
1267   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1268   PetscMPIInt    n;
1269   PetscInt       i,flg,*row,*col;
1270   PetscScalar    *val;
1271   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1272 
1273   PetscFunctionBegin;
1274   if (A->nooffprocentries) PetscFunctionReturn(0);
1275   while (1) {
1276     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1277     if (!flg) break;
1278     for (i=0;i<n;i++) {
1279       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1280       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1281       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1282       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1283       switch (A->insertmode) {
1284         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1285         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1286         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1287       }
1288     }
1289   }
1290   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1295 {
1296   PetscErrorCode ierr;
1297   Mat            Adense,As;
1298   MPI_Comm       comm;
1299 
1300   PetscFunctionBegin;
1301   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1302   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1303   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1304   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1305   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1306   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1307   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 /* -------------------------------------------------------------------*/
1312 static struct _MatOps MatOps_Values = {
1313        MatSetValues_ScaLAPACK,
1314        0,
1315        0,
1316        MatMult_ScaLAPACK,
1317 /* 4*/ MatMultAdd_ScaLAPACK,
1318        MatMultTranspose_ScaLAPACK,
1319        MatMultTransposeAdd_ScaLAPACK,
1320        MatSolve_ScaLAPACK,
1321        MatSolveAdd_ScaLAPACK,
1322        0,
1323 /*10*/ 0,
1324        MatLUFactor_ScaLAPACK,
1325        MatCholeskyFactor_ScaLAPACK,
1326        0,
1327        MatTranspose_ScaLAPACK,
1328 /*15*/ MatGetInfo_ScaLAPACK,
1329        0,
1330        MatGetDiagonal_ScaLAPACK,
1331        MatDiagonalScale_ScaLAPACK,
1332        MatNorm_ScaLAPACK,
1333 /*20*/ MatAssemblyBegin_ScaLAPACK,
1334        MatAssemblyEnd_ScaLAPACK,
1335        MatSetOption_ScaLAPACK,
1336        MatZeroEntries_ScaLAPACK,
1337 /*24*/ 0,
1338        MatLUFactorSymbolic_ScaLAPACK,
1339        MatLUFactorNumeric_ScaLAPACK,
1340        MatCholeskyFactorSymbolic_ScaLAPACK,
1341        MatCholeskyFactorNumeric_ScaLAPACK,
1342 /*29*/ MatSetUp_ScaLAPACK,
1343        0,
1344        0,
1345        0,
1346        0,
1347 /*34*/ MatDuplicate_ScaLAPACK,
1348        0,
1349        0,
1350        0,
1351        0,
1352 /*39*/ MatAXPY_ScaLAPACK,
1353        0,
1354        0,
1355        0,
1356        MatCopy_ScaLAPACK,
1357 /*44*/ 0,
1358        MatScale_ScaLAPACK,
1359        MatShift_ScaLAPACK,
1360        0,
1361        0,
1362 /*49*/ 0,
1363        0,
1364        0,
1365        0,
1366        0,
1367 /*54*/ 0,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*59*/ 0,
1373        MatDestroy_ScaLAPACK,
1374        MatView_ScaLAPACK,
1375        0,
1376        0,
1377 /*64*/ 0,
1378        0,
1379        0,
1380        0,
1381        0,
1382 /*69*/ 0,
1383        0,
1384        MatConvert_ScaLAPACK_Dense,
1385        0,
1386        0,
1387 /*74*/ 0,
1388        0,
1389        0,
1390        0,
1391        0,
1392 /*79*/ 0,
1393        0,
1394        0,
1395        0,
1396        MatLoad_ScaLAPACK,
1397 /*84*/ 0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*89*/ 0,
1403        0,
1404        MatMatMultNumeric_ScaLAPACK,
1405        0,
1406        0,
1407 /*94*/ 0,
1408        0,
1409        0,
1410        MatMatTransposeMultNumeric_ScaLAPACK,
1411        0,
1412 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1413        0,
1414        0,
1415        MatConjugate_ScaLAPACK,
1416        0,
1417 /*104*/0,
1418        0,
1419        0,
1420        0,
1421        0,
1422 /*109*/MatMatSolve_ScaLAPACK,
1423        0,
1424        0,
1425        0,
1426        MatMissingDiagonal_ScaLAPACK,
1427 /*114*/0,
1428        0,
1429        0,
1430        0,
1431        0,
1432 /*119*/0,
1433        MatHermitianTranspose_ScaLAPACK,
1434        0,
1435        0,
1436        0,
1437 /*124*/0,
1438        0,
1439        0,
1440        0,
1441        0,
1442 /*129*/0,
1443        0,
1444        0,
1445        0,
1446        0,
1447 /*134*/0,
1448        0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*140*/0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*145*/0,
1459        0,
1460        0
1461 };
1462 
1463 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1464 {
1465   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1466   PetscInt           size=stash->size,nsends;
1467   PetscErrorCode     ierr;
1468   PetscInt           count,*sindices,**rindices,i,j,l;
1469   PetscScalar        **rvalues,*svalues;
1470   MPI_Comm           comm = stash->comm;
1471   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472   PetscMPIInt        *sizes,*nlengths,nreceives;
1473   PetscInt           *sp_idx,*sp_idy;
1474   PetscScalar        *sp_val;
1475   PetscMatStashSpace space,space_next;
1476   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1478 
1479   PetscFunctionBegin;
1480   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1481     InsertMode addv;
1482     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1483     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484     mat->insertmode = addv; /* in case this processor had no cache */
1485   }
1486 
1487   bs2 = stash->bs*stash->bs;
1488 
1489   /*  first count number of contributors to each processor */
1490   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1491   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1492 
1493   i     = j    = 0;
1494   space = stash->space_head;
1495   while (space) {
1496     space_next = space->next;
1497     for (l=0; l<space->local_used; l++) {
1498       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1499       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1500       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502       nlengths[j]++; owner[i] = j;
1503       i++;
1504     }
1505     space = space_next;
1506   }
1507 
1508   /* Now check what procs get messages - and compute nsends. */
1509   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1510   for (i=0, nsends=0; i<size; i++) {
1511     if (nlengths[i]) {
1512       sizes[i] = 1; nsends++;
1513     }
1514   }
1515 
1516   {PetscMPIInt *onodes,*olengths;
1517    /* Determine the number of messages to expect, their lengths, from from-ids */
1518    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1519    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1520    /* since clubbing row,col - lengths are multiplied by 2 */
1521    for (i=0; i<nreceives; i++) olengths[i] *=2;
1522    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1523    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1525    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1526    ierr = PetscFree(onodes);CHKERRQ(ierr);
1527    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1528 
1529   /* do sends:
1530       1) starts[i] gives the starting index in svalues for stuff going to
1531          the ith processor
1532   */
1533   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1534   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1535   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1536   /* use 2 sends the first with all_a, the next with all_i and all_j */
1537   startv[0] = 0; starti[0] = 0;
1538   for (i=1; i<size; i++) {
1539     startv[i] = startv[i-1] + nlengths[i-1];
1540     starti[i] = starti[i-1] + 2*nlengths[i-1];
1541   }
1542 
1543   i     = 0;
1544   space = stash->space_head;
1545   while (space) {
1546     space_next = space->next;
1547     sp_idx     = space->idx;
1548     sp_idy     = space->idy;
1549     sp_val     = space->val;
1550     for (l=0; l<space->local_used; l++) {
1551       j = owner[i];
1552       if (bs2 == 1) {
1553         svalues[startv[j]] = sp_val[l];
1554       } else {
1555         PetscInt    k;
1556         PetscScalar *buf1,*buf2;
1557         buf1 = svalues+bs2*startv[j];
1558         buf2 = space->val + bs2*l;
1559         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560       }
1561       sindices[starti[j]]             = sp_idx[l];
1562       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563       startv[j]++;
1564       starti[j]++;
1565       i++;
1566     }
1567     space = space_next;
1568   }
1569   startv[0] = 0;
1570   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1571 
1572   for (i=0,count=0; i<size; i++) {
1573     if (sizes[i]) {
1574       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr);
1575       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr);
1576     }
1577   }
1578 #if defined(PETSC_USE_INFO)
1579   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1580   for (i=0; i<size; i++) {
1581     if (sizes[i]) {
1582       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1583     }
1584   }
1585 #endif
1586   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1587   ierr = PetscFree(owner);CHKERRQ(ierr);
1588   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1589   ierr = PetscFree(sizes);CHKERRQ(ierr);
1590 
1591   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1592   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1593 
1594   for (i=0; i<nreceives; i++) {
1595     recv_waits[2*i]   = recv_waits1[i];
1596     recv_waits[2*i+1] = recv_waits2[i];
1597   }
1598   stash->recv_waits = recv_waits;
1599 
1600   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1601   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1602 
1603   stash->svalues         = svalues;
1604   stash->sindices        = sindices;
1605   stash->rvalues         = rvalues;
1606   stash->rindices        = rindices;
1607   stash->send_waits      = send_waits;
1608   stash->nsends          = nsends;
1609   stash->nrecvs          = nreceives;
1610   stash->reproduce_count = 0;
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615 {
1616   PetscErrorCode ierr;
1617   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1618 
1619   PetscFunctionBegin;
1620   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1621   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1622   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1623   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1624   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 /*@
1629    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1630    the ScaLAPACK matrix
1631 
1632    Logically Collective on A
1633 
1634    Input Parameter:
1635 +  A  - a MATSCALAPACK matrix
1636 .  mb - the row block size
1637 -  nb - the column block size
1638 
1639    Level: intermediate
1640 
1641 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1642 @*/
1643 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1644 {
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1649   PetscValidLogicalCollectiveInt(A,mb,2);
1650   PetscValidLogicalCollectiveInt(A,nb,3);
1651   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1656 {
1657   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1658 
1659   PetscFunctionBegin;
1660   if (mb) *mb = a->mb;
1661   if (nb) *nb = a->nb;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 /*@
1666    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1667    the ScaLAPACK matrix
1668 
1669    Not collective
1670 
1671    Input Parameter:
1672 .  A  - a MATSCALAPACK matrix
1673 
1674    Output Parameters:
1675 +  mb - the row block size
1676 -  nb - the column block size
1677 
1678    Level: intermediate
1679 
1680 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1681 @*/
1682 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1683 {
1684   PetscErrorCode ierr;
1685 
1686   PetscFunctionBegin;
1687   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1688   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1693 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1694 
1695 /*MC
1696    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1697 
1698    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1699 
1700    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1701 
1702    Options Database Keys:
1703 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1704 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1705 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1706 
1707    Level: beginner
1708 
1709 .seealso: MATDENSE, MATELEMENTAL
1710 M*/
1711 
1712 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1713 {
1714   Mat_ScaLAPACK      *a;
1715   PetscErrorCode     ierr;
1716   PetscBool          flg,flg1;
1717   Mat_ScaLAPACK_Grid *grid;
1718   MPI_Comm           icomm;
1719   PetscBLASInt       nprow,npcol,myrow,mycol;
1720   PetscInt           optv1,k=2,array[2]={0,0};
1721   PetscMPIInt        size;
1722 
1723   PetscFunctionBegin;
1724   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1725   A->insertmode = NOT_SET_VALUES;
1726 
1727   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1728   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1729   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1730   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1731   A->stash.ScatterDestroy = NULL;
1732 
1733   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1734   A->data = (void*)a;
1735 
1736   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1737   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1738     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr);
1739     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1740   }
1741   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1742   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1743   if (!flg) {
1744     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1745 
1746     ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr);
1747     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1748 
1749     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1750     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1751     if (flg1) {
1752       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1753       grid->nprow = optv1;
1754     }
1755     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1756 
1757     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1758     grid->npcol = size/grid->nprow;
1759     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1760     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1761     grid->ictxt = Csys2blacs_handle(icomm);
1762     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1763     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1764     grid->grid_refct = 1;
1765     grid->nprow      = nprow;
1766     grid->npcol      = npcol;
1767     grid->myrow      = myrow;
1768     grid->mycol      = mycol;
1769     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1770     grid->ictxrow = Csys2blacs_handle(icomm);
1771     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1772     grid->ictxcol = Csys2blacs_handle(icomm);
1773     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1774     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr);
1775 
1776   } else grid->grid_refct++;
1777   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1778   a->grid = grid;
1779   a->mb   = DEFAULT_BLOCKSIZE;
1780   a->nb   = DEFAULT_BLOCKSIZE;
1781 
1782   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1783   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1784   if (flg) {
1785     a->mb = array[0];
1786     a->nb = (k>1)? array[1]: a->mb;
1787   }
1788   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1789 
1790   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1793   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /*@C
1798    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1799    (2D block cyclic distribution).
1800 
1801    Collective
1802 
1803    Input Parameters:
1804 +  comm - MPI communicator
1805 .  mb   - row block size (or PETSC_DECIDE to have it set)
1806 .  nb   - column block size (or PETSC_DECIDE to have it set)
1807 .  M    - number of global rows
1808 .  N    - number of global columns
1809 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1810 -  csrc - coordinate of process that owns the first column of the distributed matrix
1811 
1812    Output Parameter:
1813 .  A - the matrix
1814 
1815    Options Database Keys:
1816 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1817 
1818    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1819    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1820    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1821 
1822    Notes:
1823    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1824    is chosen.
1825 
1826    Storage Information:
1827    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1828    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1829    significance and are thus ignored. The block sizes refer to the values
1830    used for the distributed matrix, not the same meaning as in BAIJ.
1831 
1832    Level: intermediate
1833 
1834 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1835 @*/
1836 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1837 {
1838   PetscErrorCode ierr;
1839   Mat_ScaLAPACK  *a;
1840   PetscInt       m,n;
1841 
1842   PetscFunctionBegin;
1843   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1844   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1845   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1846   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1847   m = PETSC_DECIDE;
1848   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1849   n = PETSC_DECIDE;
1850   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1851   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1852   a = (Mat_ScaLAPACK*)(*A)->data;
1853   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1854   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1855   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1856   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1857   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1858   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1859   ierr = MatSetUp(*A);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863