xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision d24d420451aab1d024a728b511d7b61b40627436)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12 {
13   PetscErrorCode    ierr;
14   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
15   PetscBool         iascii;
16   PetscViewerFormat format;
17   Mat               Adense;
18 
19   PetscFunctionBegin;
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21   if (iascii) {
22     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28       PetscFunctionReturn(0);
29     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30       PetscFunctionReturn(0);
31     }
32   }
33   /* convert to dense format and call MatView() */
34   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41 {
42   PetscErrorCode ierr;
43   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
44   PetscLogDouble isend[2],irecv[2];
45 
46   PetscFunctionBegin;
47   info->block_size = 1.0;
48 
49   isend[0] = a->lld*a->locc;     /* locally allocated */
50   isend[1] = a->locr*a->locc;    /* used submatrix */
51   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52     info->nz_allocated   = isend[0];
53     info->nz_used        = isend[1];
54   } else if (flag == MAT_GLOBAL_MAX) {
55     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56     info->nz_allocated   = irecv[0];
57     info->nz_used        = irecv[1];
58   } else if (flag == MAT_GLOBAL_SUM) {
59     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60     info->nz_allocated   = irecv[0];
61     info->nz_used        = irecv[1];
62   }
63 
64   info->nz_unneeded       = 0;
65   info->assemblies        = A->num_ass;
66   info->mallocs           = 0;
67   info->memory            = ((PetscObject)A)->mem;
68   info->fill_ratio_given  = 0;
69   info->fill_ratio_needed = 0;
70   info->factor_mallocs    = 0;
71   PetscFunctionReturn(0);
72 }
73 
74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75 {
76   PetscFunctionBegin;
77   switch (op) {
78     case MAT_NEW_NONZERO_LOCATIONS:
79     case MAT_NEW_NONZERO_LOCATION_ERR:
80     case MAT_NEW_NONZERO_ALLOCATION_ERR:
81     case MAT_SYMMETRIC:
82     case MAT_SORTED_FULL:
83     case MAT_HERMITIAN:
84       break;
85     default:
86       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92 {
93   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j;
96   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
97 
98   PetscFunctionBegin;
99   for (i=0;i<nr;i++) {
100     if (rows[i] < 0) continue;
101     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102     for (j=0;j<nc;j++) {
103       if (cols[j] < 0) continue;
104       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107         switch (imode) {
108           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111         }
112       } else {
113         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114         A->assembled = PETSC_FALSE;
115         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116       }
117     }
118   }
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123 {
124   PetscErrorCode ierr;
125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126   PetscScalar    *x2d,*y2d,alpha=1.0;
127   const PetscInt *ranges;
128   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129 
130   PetscFunctionBegin;
131   if (transpose) {
132 
133     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
136     xlld = PetscMax(1,A->rmap->n);
137     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138     PetscCheckScaLapackInfo("descinit",info);
139     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
141     ylld = 1;
142     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143     PetscCheckScaLapackInfo("descinit",info);
144 
145     /* allocate 2d vectors */
146     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149     xlld = PetscMax(1,lszx);
150 
151     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155     PetscCheckScaLapackInfo("descinit",info);
156 
157     /* redistribute x as a column of a 2d matrix */
158     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159 
160     /* redistribute y as a row of a 2d matrix */
161     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162 
163     /* call PBLAS subroutine */
164     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165 
166     /* redistribute y from a row of a 2d matrix */
167     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168 
169   } else {   /* non-transpose */
170 
171     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
174     xlld = 1;
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
179     ylld = PetscMax(1,A->rmap->n);
180     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182 
183     /* allocate 2d vectors */
184     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187     ylld = PetscMax(1,lszy);
188 
189     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193     PetscCheckScaLapackInfo("descinit",info);
194 
195     /* redistribute x as a row of a 2d matrix */
196     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197 
198     /* redistribute y as a column of a 2d matrix */
199     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200 
201     /* call PBLAS subroutine */
202     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203 
204     /* redistribute y from a column of a 2d matrix */
205     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206 
207   }
208   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213 {
214   PetscErrorCode    ierr;
215   const PetscScalar *xarray;
216   PetscScalar       *yarray;
217 
218   PetscFunctionBegin;
219   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   PetscErrorCode    ierr;
230   const PetscScalar *xarray;
231   PetscScalar       *yarray;
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243 {
244   PetscErrorCode    ierr;
245   const PetscScalar *xarray;
246   PetscScalar       *zarray;
247 
248   PetscFunctionBegin;
249   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259 {
260   PetscErrorCode    ierr;
261   const PetscScalar *xarray;
262   PetscScalar       *zarray;
263 
264   PetscFunctionBegin;
265   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275 {
276   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279   PetscScalar   sone=1.0,zero=0.0;
280   PetscBLASInt  one=1;
281 
282   PetscFunctionBegin;
283   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284   C->assembled = PETSC_TRUE;
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295   ierr = MatSetUp(C);CHKERRQ(ierr);
296   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301 {
302   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305   PetscScalar   sone=1.0,zero=0.0;
306   PetscBLASInt  one=1;
307 
308   PetscFunctionBegin;
309   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310   C->assembled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321   ierr = MatSetUp(C);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327 {
328   PetscFunctionBegin;
329   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330   C->ops->productsymbolic = MatProductSymbolic_AB;
331   PetscFunctionReturn(0);
332 }
333 
334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335 {
336   PetscFunctionBegin;
337   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339   PetscFunctionReturn(0);
340 }
341 
342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343 {
344   PetscErrorCode ierr;
345   Mat_Product    *product = C->product;
346 
347   PetscFunctionBegin;
348   switch (product->type) {
349     case MATPRODUCT_AB:
350       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351       break;
352     case MATPRODUCT_ABt:
353       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354       break;
355     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356   }
357   PetscFunctionReturn(0);
358 }
359 /* --------------------------------------- */
360 
361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362 {
363   PetscErrorCode    ierr;
364   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365   PetscScalar       *darray,*d2d,v;
366   const PetscInt    *ranges;
367   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368 
369   PetscFunctionBegin;
370   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371 
372   if (A->rmap->N<=A->cmap->N) {   /* row version */
373 
374     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
377     dlld = PetscMax(1,A->rmap->n);
378     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379     PetscCheckScaLapackInfo("descinit",info);
380 
381     /* allocate 2d vector */
382     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384     dlld = PetscMax(1,lszd);
385 
386     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388     PetscCheckScaLapackInfo("descinit",info);
389 
390     /* collect diagonal */
391     for (j=1;j<=a->M;j++) {
392       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394     }
395 
396     /* redistribute d from a column of a 2d matrix */
397     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398     ierr = PetscFree(d2d);CHKERRQ(ierr);
399 
400   } else {   /* column version */
401 
402     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
405     dlld = 1;
406     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407     PetscCheckScaLapackInfo("descinit",info);
408 
409     /* allocate 2d vector */
410     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412 
413     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415     PetscCheckScaLapackInfo("descinit",info);
416 
417     /* collect diagonal */
418     for (j=1;j<=a->N;j++) {
419       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421     }
422 
423     /* redistribute d from a row of a 2d matrix */
424     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425     ierr = PetscFree(d2d);CHKERRQ(ierr);
426   }
427 
428   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435 {
436   PetscErrorCode    ierr;
437   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438   const PetscScalar *d;
439   const PetscInt    *ranges;
440   PetscScalar       *d2d;
441   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442 
443   PetscFunctionBegin;
444   if (R) {
445     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
449     dlld = 1;
450     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451     PetscCheckScaLapackInfo("descinit",info);
452 
453     /* allocate 2d vector */
454     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456 
457     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459     PetscCheckScaLapackInfo("descinit",info);
460 
461     /* redistribute d to a row of a 2d matrix */
462     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463 
464     /* broadcast along process columns */
465     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467 
468     /* local scaling */
469     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470 
471     ierr = PetscFree(d2d);CHKERRQ(ierr);
472     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473   }
474   if (L) {
475     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
479     dlld = PetscMax(1,A->rmap->n);
480     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481     PetscCheckScaLapackInfo("descinit",info);
482 
483     /* allocate 2d vector */
484     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486     dlld = PetscMax(1,lszd);
487 
488     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490     PetscCheckScaLapackInfo("descinit",info);
491 
492     /* redistribute d to a column of a 2d matrix */
493     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494 
495     /* broadcast along process rows */
496     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498 
499     /* local scaling */
500     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501 
502     ierr = PetscFree(d2d);CHKERRQ(ierr);
503     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509 {
510   PetscFunctionBegin;
511   *missing = PETSC_FALSE;
512   PetscFunctionReturn(0);
513 }
514 
515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516 {
517   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518   PetscBLASInt  n,one=1;
519 
520   PetscFunctionBegin;
521   n = x->lld*x->locc;
522   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523   PetscFunctionReturn(0);
524 }
525 
526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527 {
528   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529   PetscBLASInt  i,n;
530   PetscScalar   v;
531 
532   PetscFunctionBegin;
533   n = PetscMin(x->M,x->N);
534   for (i=1;i<=n;i++) {
535     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536     v += alpha;
537     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543 {
544   PetscErrorCode ierr;
545   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547   PetscBLASInt   one=1;
548   PetscScalar    beta=1.0;
549 
550   PetscFunctionBegin;
551   MatScaLAPACKCheckDistribution(Y,1,X,3);
552   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558 {
559   PetscErrorCode ierr;
560   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
562 
563   PetscFunctionBegin;
564   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570 {
571   Mat            Bs;
572   MPI_Comm       comm;
573   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581   b = (Mat_ScaLAPACK*)Bs->data;
582   b->M    = a->M;
583   b->N    = a->N;
584   b->mb   = a->mb;
585   b->nb   = a->nb;
586   b->rsrc = a->rsrc;
587   b->csrc = a->csrc;
588   ierr = MatSetUp(Bs);CHKERRQ(ierr);
589   *B = Bs;
590   if (op == MAT_COPY_VALUES) {
591     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592   }
593   Bs->assembled = PETSC_TRUE;
594   PetscFunctionReturn(0);
595 }
596 
597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598 {
599   PetscErrorCode ierr;
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613     ierr = MatSetUp(Bs);CHKERRQ(ierr);
614     *B = Bs;
615   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616   b = (Mat_ScaLAPACK*)Bs->data;
617   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618 #if defined(PETSC_USE_COMPLEX)
619   /* undo conjugation */
620   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621 #endif
622   Bs->assembled = PETSC_TRUE;
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627 {
628   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629   PetscInt      i,j;
630 
631   PetscFunctionBegin;
632   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637 {
638   PetscErrorCode ierr;
639   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640   Mat            Bs = *B;
641   PetscBLASInt   one=1;
642   PetscScalar    sone=1.0,zero=0.0;
643 
644   PetscFunctionBegin;
645   if (reuse == MAT_INITIAL_MATRIX) {
646     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649     ierr = MatSetUp(Bs);CHKERRQ(ierr);
650     *B = Bs;
651   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652   b = (Mat_ScaLAPACK*)Bs->data;
653   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654   Bs->assembled = PETSC_TRUE;
655   PetscFunctionReturn(0);
656 }
657 
658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659 {
660   PetscErrorCode ierr;
661   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662   PetscScalar    *x,*x2d;
663   const PetscInt *ranges;
664   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665 
666   PetscFunctionBegin;
667   ierr = VecCopy(B,X);CHKERRQ(ierr);
668   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669 
670   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
673   xlld = PetscMax(1,A->rmap->n);
674   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675   PetscCheckScaLapackInfo("descinit",info);
676 
677   /* allocate 2d vector */
678   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680   xlld = PetscMax(1,lszx);
681 
682   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684   PetscCheckScaLapackInfo("descinit",info);
685 
686   /* redistribute x as a column of a 2d matrix */
687   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688 
689   /* call ScaLAPACK subroutine */
690   switch (A->factortype) {
691     case MAT_FACTOR_LU:
692       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693       PetscCheckScaLapackInfo("getrs",info);
694       break;
695     case MAT_FACTOR_CHOLESKY:
696       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("potrs",info);
698       break;
699     default:
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701       break;
702   }
703 
704   /* redistribute x from a column of a 2d matrix */
705   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
706 
707   ierr = PetscFree(x2d);CHKERRQ(ierr);
708   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713 {
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
718   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723 {
724   PetscErrorCode ierr;
725   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726   PetscBool      flg1,flg2;
727   PetscBLASInt   one=1,info;
728 
729   PetscFunctionBegin;
730   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
732   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733   MatScaLAPACKCheckDistribution(B,1,X,2);
734   b = (Mat_ScaLAPACK*)B->data;
735   x = (Mat_ScaLAPACK*)X->data;
736   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
737 
738   switch (A->factortype) {
739     case MAT_FACTOR_LU:
740       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
741       PetscCheckScaLapackInfo("getrs",info);
742       break;
743     case MAT_FACTOR_CHOLESKY:
744       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
745       PetscCheckScaLapackInfo("potrs",info);
746       break;
747     default:
748       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749       break;
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755 {
756   PetscErrorCode ierr;
757   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758   PetscBLASInt   one=1,info;
759 
760   PetscFunctionBegin;
761   if (!a->pivots) {
762     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
763     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
764   }
765   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766   PetscCheckScaLapackInfo("getrf",info);
767   A->factortype = MAT_FACTOR_LU;
768   A->assembled  = PETSC_TRUE;
769 
770   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
771   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776 {
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 
785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786 {
787   PetscFunctionBegin;
788   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793 {
794   PetscErrorCode ierr;
795   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
796   PetscBLASInt   one=1,info;
797 
798   PetscFunctionBegin;
799   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
800   PetscCheckScaLapackInfo("potrf",info);
801   A->factortype = MAT_FACTOR_CHOLESKY;
802   A->assembled  = PETSC_TRUE;
803 
804   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
805   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810 {
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820 {
821   PetscFunctionBegin;
822   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823   PetscFunctionReturn(0);
824 }
825 
826 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827 {
828   PetscFunctionBegin;
829   *type = MATSOLVERSCALAPACK;
830   PetscFunctionReturn(0);
831 }
832 
833 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834 {
835   Mat            B;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   /* Create the factorization matrix */
840   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
841   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
842   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
843   ierr = MatSetUp(B);CHKERRQ(ierr);
844   B->factortype = ftype;
845   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847 
848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849   *F = B;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864 {
865   PetscErrorCode ierr;
866   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867   PetscBLASInt   one=1,lwork=0;
868   const char     *ntype;
869   PetscScalar    *work,dummy;
870 
871   PetscFunctionBegin;
872   switch (type){
873     case NORM_1:
874       ntype = "1";
875       lwork = PetscMax(a->locr,a->locc);
876       break;
877     case NORM_FROBENIUS:
878       ntype = "F";
879       work  = &dummy;
880       break;
881     case NORM_INFINITY:
882       ntype = "I";
883       lwork = PetscMax(a->locr,a->locc);
884       break;
885     default:
886       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887   }
888   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891   PetscFunctionReturn(0);
892 }
893 
894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895 {
896   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905 {
906   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
909 
910   PetscFunctionBegin;
911   if (rows) {
912     n     = a->locr;
913     nb    = a->mb;
914     isrc  = a->rsrc;
915     nproc = a->grid->nprow;
916     iproc = a->grid->myrow;
917     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920   }
921   if (cols) {
922     n     = a->locc;
923     nb    = a->nb;
924     isrc  = a->csrc;
925     nproc = a->grid->npcol;
926     iproc = a->grid->mycol;
927     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935 {
936   PetscErrorCode ierr;
937   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938   Mat            Bmpi;
939   MPI_Comm       comm;
940   const PetscInt *ranges;
941   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
942   PetscScalar    *barray;
943 
944   PetscFunctionBegin;
945   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
946 
947   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
948   else {
949     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
950     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
951     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
952     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
953   }
954 
955   /* create ScaLAPACK descriptor for B (1d block distribution) */
956   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
957   ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
958   lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
959   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
960   PetscCheckScaLapackInfo("descinit",info);
961 
962   /* redistribute matrix */
963   ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
964   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
965   ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
966 
967   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   if (reuse == MAT_INPLACE_MATRIX) {
970     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
971   } else *B = Bmpi;
972   PetscFunctionReturn(0);
973 }
974 
975 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
976 {
977   PetscErrorCode ierr;
978   Mat_ScaLAPACK  *b;
979   Mat            Bmpi;
980   MPI_Comm       comm;
981   const PetscInt *ranges;
982   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
983   PetscScalar    *aarray;
984 
985   PetscFunctionBegin;
986   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
987 
988   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
989   else {
990     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
991     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
992     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
993     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
994   }
995   b = (Mat_ScaLAPACK*)Bmpi->data;
996 
997   /* create ScaLAPACK descriptor for A (1d block distribution) */
998   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
999   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1000   lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1001   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1002   PetscCheckScaLapackInfo("descinit",info);
1003 
1004   /* redistribute matrix */
1005   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1006   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1007   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1008 
1009   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1011   if (reuse == MAT_INPLACE_MATRIX) {
1012     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1013   } else *B = Bmpi;
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1018 {
1019   Mat               mat_scal;
1020   PetscErrorCode    ierr;
1021   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1022   const PetscInt    *cols;
1023   const PetscScalar *vals;
1024 
1025   PetscFunctionBegin;
1026   if (reuse == MAT_REUSE_MATRIX) {
1027     mat_scal = *newmat;
1028     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1029   } else {
1030     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1031     m = PETSC_DECIDE;
1032     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1033     n = PETSC_DECIDE;
1034     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1035     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1036     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1037     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1038   }
1039   for (row=rstart;row<rend;row++) {
1040     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1041     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1042     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1043   }
1044   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1045   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1046 
1047   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1048   else *newmat = mat_scal;
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1053 {
1054   Mat               mat_scal;
1055   PetscErrorCode    ierr;
1056   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1057   const PetscInt    *cols;
1058   const PetscScalar *vals;
1059   PetscScalar       v;
1060 
1061   PetscFunctionBegin;
1062   if (reuse == MAT_REUSE_MATRIX) {
1063     mat_scal = *newmat;
1064     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1065   } else {
1066     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1067     m = PETSC_DECIDE;
1068     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1069     n = PETSC_DECIDE;
1070     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1071     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1072     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1073     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1074   }
1075   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1076   for (row=rstart;row<rend;row++) {
1077     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1078     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1079     for (j=0;j<ncols;j++) { /* lower triangular part */
1080       if (cols[j] == row) continue;
1081       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1082       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1083     }
1084     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1085   }
1086   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1087   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089 
1090   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1091   else *newmat = mat_scal;
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1096 {
1097   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       sz=0;
1100 
1101   PetscFunctionBegin;
1102   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1103   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1104   if (!a->lld) a->lld = a->locr;
1105 
1106   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1107   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1108   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1109   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1110 
1111   A->preallocated = PETSC_TRUE;
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1116 {
1117   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1118   PetscErrorCode     ierr;
1119   Mat_ScaLAPACK_Grid *grid;
1120   PetscBool          flg;
1121   MPI_Comm           icomm;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1125   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1126   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1127   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1128   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1129   if (--grid->grid_refct == 0) {
1130     Cblacs_gridexit(grid->ictxt);
1131     Cblacs_gridexit(grid->ictxrow);
1132     Cblacs_gridexit(grid->ictxcol);
1133     ierr = PetscFree(grid);CHKERRQ(ierr);
1134     ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1135   }
1136   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscFree(A->data);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1146 {
1147   PetscErrorCode ierr;
1148   const PetscInt *ranges;
1149   PetscMPIInt    size;
1150   PetscInt       i,n;
1151 
1152   PetscFunctionBegin;
1153   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1154   if (size>2) {
1155     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1156     n = ranges[1]-ranges[0];
1157     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1158     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1159   }
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1164 {
1165   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1166   PetscErrorCode ierr;
1167   PetscBLASInt   info=0;
1168 
1169   PetscFunctionBegin;
1170   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1171   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1172 
1173   /* check that the layout is as enforced by MatCreateScaLAPACK */
1174   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1175   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1176 
1177   /* compute local sizes */
1178   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1179   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1180   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1181   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1182   a->lld  = PetscMax(1,a->locr);
1183 
1184   /* allocate local array */
1185   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1186 
1187   /* set up ScaLAPACK descriptor */
1188   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1189   PetscCheckScaLapackInfo("descinit",info);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1194 {
1195   PetscErrorCode ierr;
1196   PetscInt       nstash,reallocs;
1197 
1198   PetscFunctionBegin;
1199   if (A->nooffprocentries) PetscFunctionReturn(0);
1200   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1201   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1202   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1207 {
1208   PetscErrorCode ierr;
1209   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1210   PetscMPIInt    n;
1211   PetscInt       i,flg,*row,*col;
1212   PetscScalar    *val;
1213   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1214 
1215   PetscFunctionBegin;
1216   if (A->nooffprocentries) PetscFunctionReturn(0);
1217   while (1) {
1218     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1219     if (!flg) break;
1220     for (i=0;i<n;i++) {
1221       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1222       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1223       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1224       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1225       switch (A->insertmode) {
1226         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1227         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1228         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1229       }
1230     }
1231   }
1232   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1237 {
1238   PetscErrorCode ierr;
1239   Mat            Adense,As;
1240   MPI_Comm       comm;
1241 
1242   PetscFunctionBegin;
1243   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1244   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1245   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1246   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1247   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1248   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1249   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 /* -------------------------------------------------------------------*/
1254 static struct _MatOps MatOps_Values = {
1255        MatSetValues_ScaLAPACK,
1256        0,
1257        0,
1258        MatMult_ScaLAPACK,
1259 /* 4*/ MatMultAdd_ScaLAPACK,
1260        MatMultTranspose_ScaLAPACK,
1261        MatMultTransposeAdd_ScaLAPACK,
1262        MatSolve_ScaLAPACK,
1263        MatSolveAdd_ScaLAPACK,
1264        0,
1265 /*10*/ 0,
1266        MatLUFactor_ScaLAPACK,
1267        MatCholeskyFactor_ScaLAPACK,
1268        0,
1269        MatTranspose_ScaLAPACK,
1270 /*15*/ MatGetInfo_ScaLAPACK,
1271        0,
1272        MatGetDiagonal_ScaLAPACK,
1273        MatDiagonalScale_ScaLAPACK,
1274        MatNorm_ScaLAPACK,
1275 /*20*/ MatAssemblyBegin_ScaLAPACK,
1276        MatAssemblyEnd_ScaLAPACK,
1277        MatSetOption_ScaLAPACK,
1278        MatZeroEntries_ScaLAPACK,
1279 /*24*/ 0,
1280        MatLUFactorSymbolic_ScaLAPACK,
1281        MatLUFactorNumeric_ScaLAPACK,
1282        MatCholeskyFactorSymbolic_ScaLAPACK,
1283        MatCholeskyFactorNumeric_ScaLAPACK,
1284 /*29*/ MatSetUp_ScaLAPACK,
1285        0,
1286        0,
1287        0,
1288        0,
1289 /*34*/ MatDuplicate_ScaLAPACK,
1290        0,
1291        0,
1292        0,
1293        0,
1294 /*39*/ MatAXPY_ScaLAPACK,
1295        0,
1296        0,
1297        0,
1298        MatCopy_ScaLAPACK,
1299 /*44*/ 0,
1300        MatScale_ScaLAPACK,
1301        MatShift_ScaLAPACK,
1302        0,
1303        0,
1304 /*49*/ 0,
1305        0,
1306        0,
1307        0,
1308        0,
1309 /*54*/ 0,
1310        0,
1311        0,
1312        0,
1313        0,
1314 /*59*/ 0,
1315        MatDestroy_ScaLAPACK,
1316        MatView_ScaLAPACK,
1317        0,
1318        0,
1319 /*64*/ 0,
1320        0,
1321        0,
1322        0,
1323        0,
1324 /*69*/ 0,
1325        0,
1326        MatConvert_ScaLAPACK_Dense,
1327        0,
1328        0,
1329 /*74*/ 0,
1330        0,
1331        0,
1332        0,
1333        0,
1334 /*79*/ 0,
1335        0,
1336        0,
1337        0,
1338        MatLoad_ScaLAPACK,
1339 /*84*/ 0,
1340        0,
1341        0,
1342        0,
1343        0,
1344 /*89*/ 0,
1345        0,
1346        MatMatMultNumeric_ScaLAPACK,
1347        0,
1348        0,
1349 /*94*/ 0,
1350        0,
1351        0,
1352        MatMatTransposeMultNumeric_ScaLAPACK,
1353        0,
1354 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1355        0,
1356        0,
1357        MatConjugate_ScaLAPACK,
1358        0,
1359 /*104*/0,
1360        0,
1361        0,
1362        0,
1363        0,
1364 /*109*/MatMatSolve_ScaLAPACK,
1365        0,
1366        0,
1367        0,
1368        MatMissingDiagonal_ScaLAPACK,
1369 /*114*/0,
1370        0,
1371        0,
1372        0,
1373        0,
1374 /*119*/0,
1375        MatHermitianTranspose_ScaLAPACK,
1376        0,
1377        0,
1378        0,
1379 /*124*/0,
1380        0,
1381        0,
1382        0,
1383        0,
1384 /*129*/0,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*134*/0,
1390        0,
1391        0,
1392        0,
1393        0,
1394        0,
1395 /*140*/0,
1396        0,
1397        0,
1398        0,
1399        0,
1400 /*145*/0,
1401        0,
1402        0
1403 };
1404 
1405 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1406 {
1407   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1408   PetscInt           size=stash->size,nsends;
1409   PetscErrorCode     ierr;
1410   PetscInt           count,*sindices,**rindices,i,j,l;
1411   PetscScalar        **rvalues,*svalues;
1412   MPI_Comm           comm = stash->comm;
1413   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1414   PetscMPIInt        *sizes,*nlengths,nreceives;
1415   PetscInt           *sp_idx,*sp_idy;
1416   PetscScalar        *sp_val;
1417   PetscMatStashSpace space,space_next;
1418   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1419   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1420 
1421   PetscFunctionBegin;
1422   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1423     InsertMode addv;
1424     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1426     mat->insertmode = addv; /* in case this processor had no cache */
1427   }
1428 
1429   bs2 = stash->bs*stash->bs;
1430 
1431   /*  first count number of contributors to each processor */
1432   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1433   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1434 
1435   i     = j    = 0;
1436   space = stash->space_head;
1437   while (space) {
1438     space_next = space->next;
1439     for (l=0; l<space->local_used; l++) {
1440       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1441       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1442       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1443       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1444       nlengths[j]++; owner[i] = j;
1445       i++;
1446     }
1447     space = space_next;
1448   }
1449 
1450   /* Now check what procs get messages - and compute nsends. */
1451   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1452   for (i=0, nsends=0; i<size; i++) {
1453     if (nlengths[i]) {
1454       sizes[i] = 1; nsends++;
1455     }
1456   }
1457 
1458   {PetscMPIInt *onodes,*olengths;
1459    /* Determine the number of messages to expect, their lengths, from from-ids */
1460    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1461    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1462    /* since clubbing row,col - lengths are multiplied by 2 */
1463    for (i=0; i<nreceives; i++) olengths[i] *=2;
1464    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1465    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1466    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1467    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1468    ierr = PetscFree(onodes);CHKERRQ(ierr);
1469    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1470 
1471   /* do sends:
1472       1) starts[i] gives the starting index in svalues for stuff going to
1473          the ith processor
1474   */
1475   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1476   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1477   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1478   /* use 2 sends the first with all_a, the next with all_i and all_j */
1479   startv[0] = 0; starti[0] = 0;
1480   for (i=1; i<size; i++) {
1481     startv[i] = startv[i-1] + nlengths[i-1];
1482     starti[i] = starti[i-1] + 2*nlengths[i-1];
1483   }
1484 
1485   i     = 0;
1486   space = stash->space_head;
1487   while (space) {
1488     space_next = space->next;
1489     sp_idx     = space->idx;
1490     sp_idy     = space->idy;
1491     sp_val     = space->val;
1492     for (l=0; l<space->local_used; l++) {
1493       j = owner[i];
1494       if (bs2 == 1) {
1495         svalues[startv[j]] = sp_val[l];
1496       } else {
1497         PetscInt    k;
1498         PetscScalar *buf1,*buf2;
1499         buf1 = svalues+bs2*startv[j];
1500         buf2 = space->val + bs2*l;
1501         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1502       }
1503       sindices[starti[j]]             = sp_idx[l];
1504       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1505       startv[j]++;
1506       starti[j]++;
1507       i++;
1508     }
1509     space = space_next;
1510   }
1511   startv[0] = 0;
1512   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1513 
1514   for (i=0,count=0; i<size; i++) {
1515     if (sizes[i]) {
1516       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1517       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1518     }
1519   }
1520 #if defined(PETSC_USE_INFO)
1521   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1522   for (i=0; i<size; i++) {
1523     if (sizes[i]) {
1524       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1525     }
1526   }
1527 #endif
1528   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1529   ierr = PetscFree(owner);CHKERRQ(ierr);
1530   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1531   ierr = PetscFree(sizes);CHKERRQ(ierr);
1532 
1533   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1534   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1535 
1536   for (i=0; i<nreceives; i++) {
1537     recv_waits[2*i]   = recv_waits1[i];
1538     recv_waits[2*i+1] = recv_waits2[i];
1539   }
1540   stash->recv_waits = recv_waits;
1541 
1542   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1543   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1544 
1545   stash->svalues         = svalues;
1546   stash->sindices        = sindices;
1547   stash->rvalues         = rvalues;
1548   stash->rindices        = rindices;
1549   stash->send_waits      = send_waits;
1550   stash->nsends          = nsends;
1551   stash->nrecvs          = nreceives;
1552   stash->reproduce_count = 0;
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1557 {
1558   PetscErrorCode ierr;
1559   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1560 
1561   PetscFunctionBegin;
1562   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1563   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1564   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1565   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1566   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /*@
1571    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1572    the ScaLAPACK matrix
1573 
1574    Logically Collective on A
1575 
1576    Input Parameter:
1577 +  A  - a MATSCALAPACK matrix
1578 .  mb - the row block size
1579 -  nb - the column block size
1580 
1581    Level: intermediate
1582 
1583 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1584 @*/
1585 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1586 {
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1591   PetscValidLogicalCollectiveInt(A,mb,2);
1592   PetscValidLogicalCollectiveInt(A,nb,3);
1593   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1598 {
1599   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1600 
1601   PetscFunctionBegin;
1602   if (mb) *mb = a->mb;
1603   if (nb) *nb = a->nb;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 /*@
1608    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1609    the ScaLAPACK matrix
1610 
1611    Not collective
1612 
1613    Input Parameter:
1614 .  A  - a MATSCALAPACK matrix
1615 
1616    Output Parameters:
1617 +  mb - the row block size
1618 -  nb - the column block size
1619 
1620    Level: intermediate
1621 
1622 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1623 @*/
1624 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1625 {
1626   PetscErrorCode ierr;
1627 
1628   PetscFunctionBegin;
1629   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1630   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1635 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1636 
1637 /*MC
1638    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1639 
1640    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1641 
1642    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1643 
1644    Options Database Keys:
1645 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1646 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1647 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1648 
1649    Level: beginner
1650 
1651 .seealso: MATDENSE, MATELEMENTAL
1652 M*/
1653 
1654 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1655 {
1656   Mat_ScaLAPACK      *a;
1657   PetscErrorCode     ierr;
1658   PetscBool          flg,flg1;
1659   Mat_ScaLAPACK_Grid *grid;
1660   MPI_Comm           icomm;
1661   PetscBLASInt       nprow,npcol,myrow,mycol;
1662   PetscInt           optv1,k=2,array[2]={0,0};
1663   PetscMPIInt        size;
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1667   A->insertmode = NOT_SET_VALUES;
1668 
1669   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1670   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1671   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1672   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1673   A->stash.ScatterDestroy = NULL;
1674 
1675   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1676   A->data = (void*)a;
1677 
1678   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1679   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1680     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1681   }
1682   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1683   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1684   if (!flg) {
1685     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1686 
1687     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1688     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1689 
1690     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1691     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1692     if (flg1) {
1693       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1694       grid->nprow = optv1;
1695     }
1696     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1697 
1698     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1699     grid->npcol = size/grid->nprow;
1700     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1701     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1702     Cblacs_get(-1,0,&grid->ictxt);
1703     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1704     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1705     grid->grid_refct = 1;
1706     grid->nprow      = nprow;
1707     grid->npcol      = npcol;
1708     grid->myrow      = myrow;
1709     grid->mycol      = mycol;
1710     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1711     Cblacs_get(-1,0,&grid->ictxrow);
1712     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1713     Cblacs_get(-1,0,&grid->ictxcol);
1714     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1715     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1716 
1717   } else grid->grid_refct++;
1718   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1719   a->grid = grid;
1720   a->mb   = DEFAULT_BLOCKSIZE;
1721   a->nb   = DEFAULT_BLOCKSIZE;
1722 
1723   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1724   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1725   if (flg) {
1726     a->mb = array[0];
1727     a->nb = (k>1)? array[1]: a->mb;
1728   }
1729   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1730 
1731   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1734   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 /*@C
1739    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1740    (2D block cyclic distribution).
1741 
1742    Collective
1743 
1744    Input Parameters:
1745 +  comm - MPI communicator
1746 .  mb   - row block size (or PETSC_DECIDE to have it set)
1747 .  nb   - column block size (or PETSC_DECIDE to have it set)
1748 .  M    - number of global rows
1749 .  N    - number of global columns
1750 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1751 -  csrc - coordinate of process that owns the first column of the distributed matrix
1752 
1753    Output Parameter:
1754 .  A - the matrix
1755 
1756    Options Database Keys:
1757 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1758 
1759    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1760    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1761    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1762 
1763    Notes:
1764    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1765    is chosen.
1766 
1767    Storage Information:
1768    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1769    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1770    significance and are thus ignored. The block sizes refer to the values
1771    used for the distributed matrix, not the same meaning as in BAIJ.
1772 
1773    Level: intermediate
1774 
1775 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1776 @*/
1777 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1778 {
1779   PetscErrorCode ierr;
1780   Mat_ScaLAPACK  *a;
1781   PetscInt       m,n;
1782 
1783   PetscFunctionBegin;
1784   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1785   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1786   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1787   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1788   m = PETSC_DECIDE;
1789   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1790   n = PETSC_DECIDE;
1791   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1792   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1793   a = (Mat_ScaLAPACK*)(*A)->data;
1794   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1795   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1796   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1797   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1798   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1799   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1800   ierr = MatSetUp(*A);CHKERRQ(ierr);
1801   PetscFunctionReturn(0);
1802 }
1803 
1804