xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision e853fb4c8b5febe3904ee8ab6dd9d8f36768265c)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31 {
32   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer,&format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
53   PetscCall(MatView(Adense,viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59 {
60   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61   PetscLogDouble isend[2],irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld*a->locc;     /* locally allocated */
67   isend[1] = a->locr*a->locc;    /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated   = isend[0];
70     info->nz_used        = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A)));
73     info->nz_allocated   = irecv[0];
74     info->nz_used        = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = ((PetscObject)A)->mem;
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97     case MAT_NEW_NONZERO_LOCATIONS:
98     case MAT_NEW_NONZERO_LOCATION_ERR:
99     case MAT_NEW_NONZERO_ALLOCATION_ERR:
100     case MAT_SYMMETRIC:
101     case MAT_SORTED_FULL:
102     case MAT_HERMITIAN:
103       break;
104     case MAT_ROW_ORIENTED:
105       a->roworiented = flg;
106       break;
107     default:
108       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
114 {
115   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
116   PetscInt       i,j;
117   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
122   for (i=0;i<nr;i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i]+1,&gridx));
125     for (j=0;j<nc;j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
129       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
133             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
134           }
135         } else {
136           switch (imode) {
137             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break;
138             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break;
139           }
140         }
141       } else {
142         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
143         A->assembled = PETSC_FALSE;
144         PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES)));
145       }
146     }
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
152 {
153   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
154   PetscScalar    *x2d,*y2d,alpha=1.0;
155   const PetscInt *ranges;
156   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
157 
158   PetscFunctionBegin;
159   if (transpose) {
160 
161     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
162     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
163     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
164     xlld = PetscMax(1,A->rmap->n);
165     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
166     PetscCheckScaLapackInfo("descinit",info);
167     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
168     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
169     ylld = 1;
170     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
171     PetscCheckScaLapackInfo("descinit",info);
172 
173     /* allocate 2d vectors */
174     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
175     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
176     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
177     xlld = PetscMax(1,lszx);
178 
179     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
180     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
183     PetscCheckScaLapackInfo("descinit",info);
184 
185     /* redistribute x as a column of a 2d matrix */
186     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
187 
188     /* redistribute y as a row of a 2d matrix */
189     if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
190 
191     /* call PBLAS subroutine */
192     PetscCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
193 
194     /* redistribute y from a row of a 2d matrix */
195     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
196 
197   } else {   /* non-transpose */
198 
199     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
200     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
201     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
202     xlld = 1;
203     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
204     PetscCheckScaLapackInfo("descinit",info);
205     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
206     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
207     ylld = PetscMax(1,A->rmap->n);
208     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
209     PetscCheckScaLapackInfo("descinit",info);
210 
211     /* allocate 2d vectors */
212     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
213     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
214     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
215     ylld = PetscMax(1,lszy);
216 
217     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
218     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
219     PetscCheckScaLapackInfo("descinit",info);
220     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
221     PetscCheckScaLapackInfo("descinit",info);
222 
223     /* redistribute x as a row of a 2d matrix */
224     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
225 
226     /* redistribute y as a column of a 2d matrix */
227     if (beta!=0.0) PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
228 
229     /* call PBLAS subroutine */
230     PetscCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
231 
232     /* redistribute y from a column of a 2d matrix */
233     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
234 
235   }
236   PetscCall(PetscFree2(x2d,y2d));
237   PetscFunctionReturn(0);
238 }
239 
240 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
241 {
242   const PetscScalar *xarray;
243   PetscScalar       *yarray;
244 
245   PetscFunctionBegin;
246   PetscCall(VecGetArrayRead(x,&xarray));
247   PetscCall(VecGetArray(y,&yarray));
248   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
249   PetscCall(VecRestoreArrayRead(x,&xarray));
250   PetscCall(VecRestoreArray(y,&yarray));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
255 {
256   const PetscScalar *xarray;
257   PetscScalar       *yarray;
258 
259   PetscFunctionBegin;
260   PetscCall(VecGetArrayRead(x,&xarray));
261   PetscCall(VecGetArray(y,&yarray));
262   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
263   PetscCall(VecRestoreArrayRead(x,&xarray));
264   PetscCall(VecRestoreArray(y,&yarray));
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269 {
270   const PetscScalar *xarray;
271   PetscScalar       *zarray;
272 
273   PetscFunctionBegin;
274   if (y != z) PetscCall(VecCopy(y,z));
275   PetscCall(VecGetArrayRead(x,&xarray));
276   PetscCall(VecGetArray(z,&zarray));
277   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
278   PetscCall(VecRestoreArrayRead(x,&xarray));
279   PetscCall(VecRestoreArray(z,&zarray));
280   PetscFunctionReturn(0);
281 }
282 
283 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
284 {
285   const PetscScalar *xarray;
286   PetscScalar       *zarray;
287 
288   PetscFunctionBegin;
289   if (y != z) PetscCall(VecCopy(y,z));
290   PetscCall(VecGetArrayRead(x,&xarray));
291   PetscCall(VecGetArray(z,&zarray));
292   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
293   PetscCall(VecRestoreArrayRead(x,&xarray));
294   PetscCall(VecRestoreArray(z,&zarray));
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
299 {
300   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
301   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
302   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
303   PetscScalar   sone=1.0,zero=0.0;
304   PetscBLASInt  one=1;
305 
306   PetscFunctionBegin;
307   PetscCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
308   C->assembled = PETSC_TRUE;
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
313 {
314   PetscFunctionBegin;
315   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
316   PetscCall(MatSetType(C,MATSCALAPACK));
317   PetscCall(MatSetUp(C));
318   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
319   PetscFunctionReturn(0);
320 }
321 
322 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
323 {
324   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
325   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
326   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
327   PetscScalar   sone=1.0,zero=0.0;
328   PetscBLASInt  one=1;
329 
330   PetscFunctionBegin;
331   PetscCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
332   C->assembled = PETSC_TRUE;
333   PetscFunctionReturn(0);
334 }
335 
336 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
337 {
338   PetscFunctionBegin;
339   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
340   PetscCall(MatSetType(C,MATSCALAPACK));
341   PetscCall(MatSetUp(C));
342   PetscFunctionReturn(0);
343 }
344 
345 /* --------------------------------------- */
346 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
347 {
348   PetscFunctionBegin;
349   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
350   C->ops->productsymbolic = MatProductSymbolic_AB;
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
355 {
356   PetscFunctionBegin;
357   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
358   C->ops->productsymbolic          = MatProductSymbolic_ABt;
359   PetscFunctionReturn(0);
360 }
361 
362 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
363 {
364   Mat_Product    *product = C->product;
365 
366   PetscFunctionBegin;
367   switch (product->type) {
368     case MATPRODUCT_AB:
369       PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
370       break;
371     case MATPRODUCT_ABt:
372       PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
373       break;
374     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
375   }
376   PetscFunctionReturn(0);
377 }
378 /* --------------------------------------- */
379 
380 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
381 {
382   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
383   PetscScalar       *darray,*d2d,v;
384   const PetscInt    *ranges;
385   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
386 
387   PetscFunctionBegin;
388   PetscCall(VecGetArray(D,&darray));
389 
390   if (A->rmap->N<=A->cmap->N) {   /* row version */
391 
392     /* create ScaLAPACK descriptor for vector (1d block distribution) */
393     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
394     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
395     dlld = PetscMax(1,A->rmap->n);
396     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
397     PetscCheckScaLapackInfo("descinit",info);
398 
399     /* allocate 2d vector */
400     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
401     PetscCall(PetscCalloc1(lszd,&d2d));
402     dlld = PetscMax(1,lszd);
403 
404     /* create ScaLAPACK descriptor for vector (2d block distribution) */
405     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
406     PetscCheckScaLapackInfo("descinit",info);
407 
408     /* collect diagonal */
409     for (j=1;j<=a->M;j++) {
410       PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
411       PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
412     }
413 
414     /* redistribute d from a column of a 2d matrix */
415     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
416     PetscCall(PetscFree(d2d));
417 
418   } else {   /* column version */
419 
420     /* create ScaLAPACK descriptor for vector (1d block distribution) */
421     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
422     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
423     dlld = 1;
424     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
425     PetscCheckScaLapackInfo("descinit",info);
426 
427     /* allocate 2d vector */
428     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
429     PetscCall(PetscCalloc1(lszd,&d2d));
430 
431     /* create ScaLAPACK descriptor for vector (2d block distribution) */
432     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
433     PetscCheckScaLapackInfo("descinit",info);
434 
435     /* collect diagonal */
436     for (j=1;j<=a->N;j++) {
437       PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
438       PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
439     }
440 
441     /* redistribute d from a row of a 2d matrix */
442     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
443     PetscCall(PetscFree(d2d));
444   }
445 
446   PetscCall(VecRestoreArray(D,&darray));
447   PetscCall(VecAssemblyBegin(D));
448   PetscCall(VecAssemblyEnd(D));
449   PetscFunctionReturn(0);
450 }
451 
452 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
453 {
454   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
455   const PetscScalar *d;
456   const PetscInt    *ranges;
457   PetscScalar       *d2d;
458   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
459 
460   PetscFunctionBegin;
461   if (R) {
462     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
463     /* create ScaLAPACK descriptor for vector (1d block distribution) */
464     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
465     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
466     dlld = 1;
467     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
468     PetscCheckScaLapackInfo("descinit",info);
469 
470     /* allocate 2d vector */
471     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
472     PetscCall(PetscCalloc1(lszd,&d2d));
473 
474     /* create ScaLAPACK descriptor for vector (2d block distribution) */
475     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
476     PetscCheckScaLapackInfo("descinit",info);
477 
478     /* redistribute d to a row of a 2d matrix */
479     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
480 
481     /* broadcast along process columns */
482     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
483     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
484 
485     /* local scaling */
486     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
487 
488     PetscCall(PetscFree(d2d));
489     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
490   }
491   if (L) {
492     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
493     /* create ScaLAPACK descriptor for vector (1d block distribution) */
494     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
495     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
496     dlld = PetscMax(1,A->rmap->n);
497     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
498     PetscCheckScaLapackInfo("descinit",info);
499 
500     /* allocate 2d vector */
501     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
502     PetscCall(PetscCalloc1(lszd,&d2d));
503     dlld = PetscMax(1,lszd);
504 
505     /* create ScaLAPACK descriptor for vector (2d block distribution) */
506     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
507     PetscCheckScaLapackInfo("descinit",info);
508 
509     /* redistribute d to a column of a 2d matrix */
510     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
511 
512     /* broadcast along process rows */
513     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
514     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
515 
516     /* local scaling */
517     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
518 
519     PetscCall(PetscFree(d2d));
520     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
521   }
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
526 {
527   PetscFunctionBegin;
528   *missing = PETSC_FALSE;
529   PetscFunctionReturn(0);
530 }
531 
532 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
533 {
534   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
535   PetscBLASInt  n,one=1;
536 
537   PetscFunctionBegin;
538   n = x->lld*x->locc;
539   PetscCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
540   PetscFunctionReturn(0);
541 }
542 
543 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
544 {
545   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
546   PetscBLASInt  i,n;
547   PetscScalar   v;
548 
549   PetscFunctionBegin;
550   n = PetscMin(x->M,x->N);
551   for (i=1;i<=n;i++) {
552     PetscCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
553     v += alpha;
554     PetscCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
560 {
561   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
562   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
563   PetscBLASInt   one=1;
564   PetscScalar    beta=1.0;
565 
566   PetscFunctionBegin;
567   MatScaLAPACKCheckDistribution(Y,1,X,3);
568   PetscCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
569   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
570   PetscFunctionReturn(0);
571 }
572 
573 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
574 {
575   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
576   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
577 
578   PetscFunctionBegin;
579   PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
580   PetscCall(PetscObjectStateIncrease((PetscObject)B));
581   PetscFunctionReturn(0);
582 }
583 
584 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
585 {
586   Mat            Bs;
587   MPI_Comm       comm;
588   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
589 
590   PetscFunctionBegin;
591   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
592   PetscCall(MatCreate(comm,&Bs));
593   PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
594   PetscCall(MatSetType(Bs,MATSCALAPACK));
595   b = (Mat_ScaLAPACK*)Bs->data;
596   b->M    = a->M;
597   b->N    = a->N;
598   b->mb   = a->mb;
599   b->nb   = a->nb;
600   b->rsrc = a->rsrc;
601   b->csrc = a->csrc;
602   PetscCall(MatSetUp(Bs));
603   *B = Bs;
604   if (op == MAT_COPY_VALUES) {
605     PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
606   }
607   Bs->assembled = PETSC_TRUE;
608   PetscFunctionReturn(0);
609 }
610 
611 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
612 {
613   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
614   Mat            Bs = *B;
615   PetscBLASInt   one=1;
616   PetscScalar    sone=1.0,zero=0.0;
617 #if defined(PETSC_USE_COMPLEX)
618   PetscInt       i,j;
619 #endif
620 
621   PetscFunctionBegin;
622   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B));
623   if (reuse == MAT_INITIAL_MATRIX) {
624     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
625     *B = Bs;
626   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
627   b = (Mat_ScaLAPACK*)Bs->data;
628   PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
629 #if defined(PETSC_USE_COMPLEX)
630   /* undo conjugation */
631   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
632 #endif
633   Bs->assembled = PETSC_TRUE;
634   PetscFunctionReturn(0);
635 }
636 
637 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
638 {
639   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
640   PetscInt      i,j;
641 
642   PetscFunctionBegin;
643   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
644   PetscFunctionReturn(0);
645 }
646 
647 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
648 {
649   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
650   Mat            Bs = *B;
651   PetscBLASInt   one=1;
652   PetscScalar    sone=1.0,zero=0.0;
653 
654   PetscFunctionBegin;
655   if (reuse == MAT_INITIAL_MATRIX) {
656     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
657     *B = Bs;
658   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
659   b = (Mat_ScaLAPACK*)Bs->data;
660   PetscCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
661   Bs->assembled = PETSC_TRUE;
662   PetscFunctionReturn(0);
663 }
664 
665 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
666 {
667   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
668   PetscScalar    *x,*x2d;
669   const PetscInt *ranges;
670   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
671 
672   PetscFunctionBegin;
673   PetscCall(VecCopy(B,X));
674   PetscCall(VecGetArray(X,&x));
675 
676   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
677   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
678   PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
679   xlld = PetscMax(1,A->rmap->n);
680   PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
681   PetscCheckScaLapackInfo("descinit",info);
682 
683   /* allocate 2d vector */
684   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
685   PetscCall(PetscMalloc1(lszx,&x2d));
686   xlld = PetscMax(1,lszx);
687 
688   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
689   PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
690   PetscCheckScaLapackInfo("descinit",info);
691 
692   /* redistribute x as a column of a 2d matrix */
693   PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
694 
695   /* call ScaLAPACK subroutine */
696   switch (A->factortype) {
697     case MAT_FACTOR_LU:
698       PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
699       PetscCheckScaLapackInfo("getrs",info);
700       break;
701     case MAT_FACTOR_CHOLESKY:
702       PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
703       PetscCheckScaLapackInfo("potrs",info);
704       break;
705     default:
706       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
707   }
708 
709   /* redistribute x from a column of a 2d matrix */
710   PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
711 
712   PetscCall(PetscFree(x2d));
713   PetscCall(VecRestoreArray(X,&x));
714   PetscFunctionReturn(0);
715 }
716 
717 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
718 {
719   PetscFunctionBegin;
720   PetscCall(MatSolve_ScaLAPACK(A,B,X));
721   PetscCall(VecAXPY(X,1,Y));
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
726 {
727   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
728   PetscBool      flg1,flg2;
729   PetscBLASInt   one=1,info;
730 
731   PetscFunctionBegin;
732   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
733   PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
734   PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
735   MatScaLAPACKCheckDistribution(B,1,X,2);
736   b = (Mat_ScaLAPACK*)B->data;
737   x = (Mat_ScaLAPACK*)X->data;
738   PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
739 
740   switch (A->factortype) {
741     case MAT_FACTOR_LU:
742       PetscCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
743       PetscCheckScaLapackInfo("getrs",info);
744       break;
745     case MAT_FACTOR_CHOLESKY:
746       PetscCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
747       PetscCheckScaLapackInfo("potrs",info);
748       break;
749     default:
750       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
756 {
757   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758   PetscBLASInt   one=1,info;
759 
760   PetscFunctionBegin;
761   if (!a->pivots) {
762     PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots));
763     PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
764   }
765   PetscCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766   PetscCheckScaLapackInfo("getrf",info);
767   A->factortype = MAT_FACTOR_LU;
768   A->assembled  = PETSC_TRUE;
769 
770   PetscCall(PetscFree(A->solvertype));
771   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
772   PetscFunctionReturn(0);
773 }
774 
775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776 {
777   PetscFunctionBegin;
778   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
779   PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info));
780   PetscFunctionReturn(0);
781 }
782 
783 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
784 {
785   PetscFunctionBegin;
786   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
787   PetscFunctionReturn(0);
788 }
789 
790 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
791 {
792   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
793   PetscBLASInt   one=1,info;
794 
795   PetscFunctionBegin;
796   PetscCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
797   PetscCheckScaLapackInfo("potrf",info);
798   A->factortype = MAT_FACTOR_CHOLESKY;
799   A->assembled  = PETSC_TRUE;
800 
801   PetscCall(PetscFree(A->solvertype));
802   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
803   PetscFunctionReturn(0);
804 }
805 
806 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
807 {
808   PetscFunctionBegin;
809   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
810   PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info));
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
815 {
816   PetscFunctionBegin;
817   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
822 {
823   PetscFunctionBegin;
824   *type = MATSOLVERSCALAPACK;
825   PetscFunctionReturn(0);
826 }
827 
828 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
829 {
830   Mat            B;
831   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
832 
833   PetscFunctionBegin;
834   /* Create the factorization matrix */
835   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
836   B->trivialsymbolic = PETSC_TRUE;
837   B->factortype = ftype;
838   PetscCall(PetscFree(B->solvertype));
839   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
840 
841   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
842   *F = B;
843   PetscFunctionReturn(0);
844 }
845 
846 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
847 {
848   PetscFunctionBegin;
849   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
850   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
851   PetscFunctionReturn(0);
852 }
853 
854 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
855 {
856   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
857   PetscBLASInt   one=1,lwork=0;
858   const char     *ntype;
859   PetscScalar    *work=NULL,dummy;
860 
861   PetscFunctionBegin;
862   switch (type) {
863     case NORM_1:
864       ntype = "1";
865       lwork = PetscMax(a->locr,a->locc);
866       break;
867     case NORM_FROBENIUS:
868       ntype = "F";
869       work  = &dummy;
870       break;
871     case NORM_INFINITY:
872       ntype = "I";
873       lwork = PetscMax(a->locr,a->locc);
874       break;
875     default:
876       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
877   }
878   if (lwork) PetscCall(PetscMalloc1(lwork,&work));
879   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
880   if (lwork) PetscCall(PetscFree(work));
881   PetscFunctionReturn(0);
882 }
883 
884 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
885 {
886   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
887 
888   PetscFunctionBegin;
889   PetscCall(PetscArrayzero(a->loc,a->lld*a->locc));
890   PetscFunctionReturn(0);
891 }
892 
893 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
894 {
895   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
896   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
897 
898   PetscFunctionBegin;
899   if (rows) {
900     n     = a->locr;
901     nb    = a->mb;
902     isrc  = a->rsrc;
903     nproc = a->grid->nprow;
904     iproc = a->grid->myrow;
905     PetscCall(PetscMalloc1(n,&idx));
906     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
907     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
908   }
909   if (cols) {
910     n     = a->locc;
911     nb    = a->nb;
912     isrc  = a->csrc;
913     nproc = a->grid->npcol;
914     iproc = a->grid->mycol;
915     PetscCall(PetscMalloc1(n,&idx));
916     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
917     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
923 {
924   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
925   Mat            Bmpi;
926   MPI_Comm       comm;
927   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
928   const PetscInt *ranges,*branges,*cwork;
929   const PetscScalar *vwork;
930   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
931   PetscScalar    *barray;
932   PetscBool      differ=PETSC_FALSE;
933   PetscMPIInt    size;
934 
935   PetscFunctionBegin;
936   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
937   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
938 
939   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
940     PetscCallMPI(MPI_Comm_size(comm,&size));
941     PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges));
942     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
943   }
944 
945   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
946     PetscCall(MatCreate(comm,&Bmpi));
947     m = PETSC_DECIDE;
948     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
949     n = PETSC_DECIDE;
950     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
951     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
952     PetscCall(MatSetType(Bmpi,MATDENSE));
953     PetscCall(MatSetUp(Bmpi));
954 
955     /* create ScaLAPACK descriptor for B (1d block distribution) */
956     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
957     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
958     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
959     PetscCheckScaLapackInfo("descinit",info);
960 
961     /* redistribute matrix */
962     PetscCall(MatDenseGetArray(Bmpi,&barray));
963     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
964     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
965     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
966     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
967 
968     /* transfer rows of auxiliary matrix to the final matrix B */
969     PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend));
970     for (i=rstart;i<rend;i++) {
971       PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
972       PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
973       PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
974     }
975     PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
976     PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
977     PetscCall(MatDestroy(&Bmpi));
978 
979   } else {  /* normal cases */
980 
981     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
982     else {
983       PetscCall(MatCreate(comm,&Bmpi));
984       m = PETSC_DECIDE;
985       PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
986       n = PETSC_DECIDE;
987       PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
988       PetscCall(MatSetSizes(Bmpi,m,n,M,N));
989       PetscCall(MatSetType(Bmpi,MATDENSE));
990       PetscCall(MatSetUp(Bmpi));
991     }
992 
993     /* create ScaLAPACK descriptor for B (1d block distribution) */
994     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
995     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
996     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
997     PetscCheckScaLapackInfo("descinit",info);
998 
999     /* redistribute matrix */
1000     PetscCall(MatDenseGetArray(Bmpi,&barray));
1001     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1002     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
1003 
1004     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1005     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1006     if (reuse == MAT_INPLACE_MATRIX) {
1007       PetscCall(MatHeaderReplace(A,&Bmpi));
1008     } else *B = Bmpi;
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct)
1014 {
1015   const PetscInt *ranges;
1016   PetscMPIInt    size;
1017   PetscInt       i,n;
1018 
1019   PetscFunctionBegin;
1020   *correct = PETSC_TRUE;
1021   PetscCallMPI(MPI_Comm_size(map->comm,&size));
1022   if (size>1) {
1023     PetscCall(PetscLayoutGetRanges(map,&ranges));
1024     n = ranges[1]-ranges[0];
1025     for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break;
1026     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n));
1027   }
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1032 {
1033   Mat_ScaLAPACK  *b;
1034   Mat            Bmpi;
1035   MPI_Comm       comm;
1036   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1037   const PetscInt *ranges,*rows,*cols;
1038   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1039   PetscScalar    *aarray;
1040   IS             ir,ic;
1041   PetscInt       lda;
1042   PetscBool      flg;
1043 
1044   PetscFunctionBegin;
1045   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1046 
1047   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1048   else {
1049     PetscCall(MatCreate(comm,&Bmpi));
1050     m = PETSC_DECIDE;
1051     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1052     n = PETSC_DECIDE;
1053     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1054     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
1055     PetscCall(MatSetType(Bmpi,MATSCALAPACK));
1056     PetscCall(MatSetUp(Bmpi));
1057   }
1058   b = (Mat_ScaLAPACK*)Bmpi->data;
1059 
1060   PetscCall(MatDenseGetLDA(A,&lda));
1061   PetscCall(MatDenseGetArray(A,&aarray));
1062   PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg));
1063   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg));
1064   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1065     /* create ScaLAPACK descriptor for A (1d block distribution) */
1066     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
1067     PetscCall(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
1068     lld = PetscMax(lda,1);  /* local leading dimension */
1069     PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1070     PetscCheckScaLapackInfo("descinit",info);
1071 
1072     /* redistribute matrix */
1073     PetscCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1074     Bmpi->nooffprocentries = PETSC_TRUE;
1075   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1076     PetscCheck(lda == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")",lda,A->rmap->n);
1077     b->roworiented = PETSC_FALSE;
1078     PetscCall(MatGetOwnershipIS(A,&ir,&ic));
1079     PetscCall(ISGetIndices(ir,&rows));
1080     PetscCall(ISGetIndices(ic,&cols));
1081     PetscCall(MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES));
1082     PetscCall(ISRestoreIndices(ir,&rows));
1083     PetscCall(ISRestoreIndices(ic,&cols));
1084     PetscCall(ISDestroy(&ic));
1085     PetscCall(ISDestroy(&ir));
1086   }
1087   PetscCall(MatDenseRestoreArray(A,&aarray));
1088   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1089   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1090   if (reuse == MAT_INPLACE_MATRIX) {
1091     PetscCall(MatHeaderReplace(A,&Bmpi));
1092   } else *B = Bmpi;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1097 {
1098   Mat               mat_scal;
1099   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1100   const PetscInt    *cols;
1101   const PetscScalar *vals;
1102 
1103   PetscFunctionBegin;
1104   if (reuse == MAT_REUSE_MATRIX) {
1105     mat_scal = *newmat;
1106     PetscCall(MatZeroEntries(mat_scal));
1107   } else {
1108     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1109     m = PETSC_DECIDE;
1110     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1111     n = PETSC_DECIDE;
1112     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1113     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1114     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1115     PetscCall(MatSetUp(mat_scal));
1116   }
1117   for (row=rstart;row<rend;row++) {
1118     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1119     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
1120     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1121   }
1122   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1123   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1124 
1125   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1126   else *newmat = mat_scal;
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1131 {
1132   Mat               mat_scal;
1133   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1134   const PetscInt    *cols;
1135   const PetscScalar *vals;
1136   PetscScalar       v;
1137 
1138   PetscFunctionBegin;
1139   if (reuse == MAT_REUSE_MATRIX) {
1140     mat_scal = *newmat;
1141     PetscCall(MatZeroEntries(mat_scal));
1142   } else {
1143     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1144     m = PETSC_DECIDE;
1145     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1146     n = PETSC_DECIDE;
1147     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1148     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1149     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1150     PetscCall(MatSetUp(mat_scal));
1151   }
1152   PetscCall(MatGetRowUpperTriangular(A));
1153   for (row=rstart;row<rend;row++) {
1154     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1155     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1156     for (j=0;j<ncols;j++) { /* lower triangular part */
1157       if (cols[j] == row) continue;
1158       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1159       PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1160     }
1161     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1162   }
1163   PetscCall(MatRestoreRowUpperTriangular(A));
1164   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1165   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1166 
1167   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1168   else *newmat = mat_scal;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1173 {
1174   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1175   PetscInt       sz=0;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(PetscLayoutSetUp(A->rmap));
1179   PetscCall(PetscLayoutSetUp(A->cmap));
1180   if (!a->lld) a->lld = a->locr;
1181 
1182   PetscCall(PetscFree(a->loc));
1183   PetscCall(PetscIntMultError(a->lld,a->locc,&sz));
1184   PetscCall(PetscCalloc1(sz,&a->loc));
1185   PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1186 
1187   A->preallocated = PETSC_TRUE;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1192 {
1193   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1194   Mat_ScaLAPACK_Grid *grid;
1195   PetscBool          flg;
1196   MPI_Comm           icomm;
1197 
1198   PetscFunctionBegin;
1199   PetscCall(MatStashDestroy_Private(&A->stash));
1200   PetscCall(PetscFree(a->loc));
1201   PetscCall(PetscFree(a->pivots));
1202   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1203   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1204   if (--grid->grid_refct == 0) {
1205     Cblacs_gridexit(grid->ictxt);
1206     Cblacs_gridexit(grid->ictxrow);
1207     Cblacs_gridexit(grid->ictxcol);
1208     PetscCall(PetscFree(grid));
1209     PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1210   }
1211   PetscCall(PetscCommDestroy(&icomm));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
1215   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
1216   PetscCall(PetscFree(A->data));
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1221 {
1222   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1223   PetscBLASInt   info=0;
1224   PetscBool      flg;
1225 
1226   PetscFunctionBegin;
1227   PetscCall(PetscLayoutSetUp(A->rmap));
1228   PetscCall(PetscLayoutSetUp(A->cmap));
1229 
1230   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1231   PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg));
1232   PetscCheck(flg,A->rmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1233   PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg));
1234   PetscCheck(flg,A->cmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1235 
1236   /* compute local sizes */
1237   PetscCall(PetscBLASIntCast(A->rmap->N,&a->M));
1238   PetscCall(PetscBLASIntCast(A->cmap->N,&a->N));
1239   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1240   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1241   a->lld  = PetscMax(1,a->locr);
1242 
1243   /* allocate local array */
1244   PetscCall(MatScaLAPACKSetPreallocation(A));
1245 
1246   /* set up ScaLAPACK descriptor */
1247   PetscCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1248   PetscCheckScaLapackInfo("descinit",info);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1253 {
1254   PetscInt       nstash,reallocs;
1255 
1256   PetscFunctionBegin;
1257   if (A->nooffprocentries) PetscFunctionReturn(0);
1258   PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL));
1259   PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
1260   PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265 {
1266   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1267   PetscMPIInt    n;
1268   PetscInt       i,flg,*row,*col;
1269   PetscScalar    *val;
1270   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1271 
1272   PetscFunctionBegin;
1273   if (A->nooffprocentries) PetscFunctionReturn(0);
1274   while (1) {
1275     PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1276     if (!flg) break;
1277     for (i=0;i<n;i++) {
1278       PetscCall(PetscBLASIntCast(row[i]+1,&gridx));
1279       PetscCall(PetscBLASIntCast(col[i]+1,&gcidx));
1280       PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1281       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1282       switch (A->insertmode) {
1283         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1284         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1285         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1286       }
1287     }
1288   }
1289   PetscCall(MatStashScatterEnd_Private(&A->stash));
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1294 {
1295   Mat            Adense,As;
1296   MPI_Comm       comm;
1297 
1298   PetscFunctionBegin;
1299   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
1300   PetscCall(MatCreate(comm,&Adense));
1301   PetscCall(MatSetType(Adense,MATDENSE));
1302   PetscCall(MatLoad(Adense,viewer));
1303   PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
1304   PetscCall(MatDestroy(&Adense));
1305   PetscCall(MatHeaderReplace(newMat,&As));
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /* -------------------------------------------------------------------*/
1310 static struct _MatOps MatOps_Values = {
1311        MatSetValues_ScaLAPACK,
1312        0,
1313        0,
1314        MatMult_ScaLAPACK,
1315 /* 4*/ MatMultAdd_ScaLAPACK,
1316        MatMultTranspose_ScaLAPACK,
1317        MatMultTransposeAdd_ScaLAPACK,
1318        MatSolve_ScaLAPACK,
1319        MatSolveAdd_ScaLAPACK,
1320        0,
1321 /*10*/ 0,
1322        MatLUFactor_ScaLAPACK,
1323        MatCholeskyFactor_ScaLAPACK,
1324        0,
1325        MatTranspose_ScaLAPACK,
1326 /*15*/ MatGetInfo_ScaLAPACK,
1327        0,
1328        MatGetDiagonal_ScaLAPACK,
1329        MatDiagonalScale_ScaLAPACK,
1330        MatNorm_ScaLAPACK,
1331 /*20*/ MatAssemblyBegin_ScaLAPACK,
1332        MatAssemblyEnd_ScaLAPACK,
1333        MatSetOption_ScaLAPACK,
1334        MatZeroEntries_ScaLAPACK,
1335 /*24*/ 0,
1336        MatLUFactorSymbolic_ScaLAPACK,
1337        MatLUFactorNumeric_ScaLAPACK,
1338        MatCholeskyFactorSymbolic_ScaLAPACK,
1339        MatCholeskyFactorNumeric_ScaLAPACK,
1340 /*29*/ MatSetUp_ScaLAPACK,
1341        0,
1342        0,
1343        0,
1344        0,
1345 /*34*/ MatDuplicate_ScaLAPACK,
1346        0,
1347        0,
1348        0,
1349        0,
1350 /*39*/ MatAXPY_ScaLAPACK,
1351        0,
1352        0,
1353        0,
1354        MatCopy_ScaLAPACK,
1355 /*44*/ 0,
1356        MatScale_ScaLAPACK,
1357        MatShift_ScaLAPACK,
1358        0,
1359        0,
1360 /*49*/ 0,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*54*/ 0,
1366        0,
1367        0,
1368        0,
1369        0,
1370 /*59*/ 0,
1371        MatDestroy_ScaLAPACK,
1372        MatView_ScaLAPACK,
1373        0,
1374        0,
1375 /*64*/ 0,
1376        0,
1377        0,
1378        0,
1379        0,
1380 /*69*/ 0,
1381        0,
1382        MatConvert_ScaLAPACK_Dense,
1383        0,
1384        0,
1385 /*74*/ 0,
1386        0,
1387        0,
1388        0,
1389        0,
1390 /*79*/ 0,
1391        0,
1392        0,
1393        0,
1394        MatLoad_ScaLAPACK,
1395 /*84*/ 0,
1396        0,
1397        0,
1398        0,
1399        0,
1400 /*89*/ 0,
1401        0,
1402        MatMatMultNumeric_ScaLAPACK,
1403        0,
1404        0,
1405 /*94*/ 0,
1406        0,
1407        0,
1408        MatMatTransposeMultNumeric_ScaLAPACK,
1409        0,
1410 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1411        0,
1412        0,
1413        MatConjugate_ScaLAPACK,
1414        0,
1415 /*104*/0,
1416        0,
1417        0,
1418        0,
1419        0,
1420 /*109*/MatMatSolve_ScaLAPACK,
1421        0,
1422        0,
1423        0,
1424        MatMissingDiagonal_ScaLAPACK,
1425 /*114*/0,
1426        0,
1427        0,
1428        0,
1429        0,
1430 /*119*/0,
1431        MatHermitianTranspose_ScaLAPACK,
1432        0,
1433        0,
1434        0,
1435 /*124*/0,
1436        0,
1437        0,
1438        0,
1439        0,
1440 /*129*/0,
1441        0,
1442        0,
1443        0,
1444        0,
1445 /*134*/0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        0,
1451 /*140*/0,
1452        0,
1453        0,
1454        0,
1455        0,
1456 /*145*/0,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*150*/0
1462 };
1463 
1464 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1465 {
1466   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1467   PetscInt           size=stash->size,nsends;
1468   PetscInt           count,*sindices,**rindices,i,j,l;
1469   PetscScalar        **rvalues,*svalues;
1470   MPI_Comm           comm = stash->comm;
1471   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472   PetscMPIInt        *sizes,*nlengths,nreceives;
1473   PetscInt           *sp_idx,*sp_idy;
1474   PetscScalar        *sp_val;
1475   PetscMatStashSpace space,space_next;
1476   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1478 
1479   PetscFunctionBegin;
1480   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1481     InsertMode addv;
1482     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
1483     PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484     mat->insertmode = addv; /* in case this processor had no cache */
1485   }
1486 
1487   bs2 = stash->bs*stash->bs;
1488 
1489   /*  first count number of contributors to each processor */
1490   PetscCall(PetscCalloc1(size,&nlengths));
1491   PetscCall(PetscMalloc1(stash->n+1,&owner));
1492 
1493   i     = j    = 0;
1494   space = stash->space_head;
1495   while (space) {
1496     space_next = space->next;
1497     for (l=0; l<space->local_used; l++) {
1498       PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx));
1499       PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1500       PetscCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502       nlengths[j]++; owner[i] = j;
1503       i++;
1504     }
1505     space = space_next;
1506   }
1507 
1508   /* Now check what procs get messages - and compute nsends. */
1509   PetscCall(PetscCalloc1(size,&sizes));
1510   for (i=0, nsends=0; i<size; i++) {
1511     if (nlengths[i]) {
1512       sizes[i] = 1; nsends++;
1513     }
1514   }
1515 
1516   {PetscMPIInt *onodes,*olengths;
1517    /* Determine the number of messages to expect, their lengths, from from-ids */
1518    PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
1519    PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1520    /* since clubbing row,col - lengths are multiplied by 2 */
1521    for (i=0; i<nreceives; i++) olengths[i] *=2;
1522    PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1523    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1525    PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
1526    PetscCall(PetscFree(onodes));
1527    PetscCall(PetscFree(olengths));}
1528 
1529   /* do sends:
1530       1) starts[i] gives the starting index in svalues for stuff going to
1531          the ith processor
1532   */
1533   PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
1534   PetscCall(PetscMalloc1(2*nsends,&send_waits));
1535   PetscCall(PetscMalloc2(size,&startv,size,&starti));
1536   /* use 2 sends the first with all_a, the next with all_i and all_j */
1537   startv[0] = 0; starti[0] = 0;
1538   for (i=1; i<size; i++) {
1539     startv[i] = startv[i-1] + nlengths[i-1];
1540     starti[i] = starti[i-1] + 2*nlengths[i-1];
1541   }
1542 
1543   i     = 0;
1544   space = stash->space_head;
1545   while (space) {
1546     space_next = space->next;
1547     sp_idx     = space->idx;
1548     sp_idy     = space->idy;
1549     sp_val     = space->val;
1550     for (l=0; l<space->local_used; l++) {
1551       j = owner[i];
1552       if (bs2 == 1) {
1553         svalues[startv[j]] = sp_val[l];
1554       } else {
1555         PetscInt    k;
1556         PetscScalar *buf1,*buf2;
1557         buf1 = svalues+bs2*startv[j];
1558         buf2 = space->val + bs2*l;
1559         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560       }
1561       sindices[starti[j]]             = sp_idx[l];
1562       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563       startv[j]++;
1564       starti[j]++;
1565       i++;
1566     }
1567     space = space_next;
1568   }
1569   startv[0] = 0;
1570   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1571 
1572   for (i=0,count=0; i<size; i++) {
1573     if (sizes[i]) {
1574       PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
1575       PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1576     }
1577   }
1578 #if defined(PETSC_USE_INFO)
1579   PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1580   for (i=0; i<size; i++) {
1581     if (sizes[i]) {
1582       PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1583     }
1584   }
1585 #endif
1586   PetscCall(PetscFree(nlengths));
1587   PetscCall(PetscFree(owner));
1588   PetscCall(PetscFree2(startv,starti));
1589   PetscCall(PetscFree(sizes));
1590 
1591   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1592   PetscCall(PetscMalloc1(2*nreceives,&recv_waits));
1593 
1594   for (i=0; i<nreceives; i++) {
1595     recv_waits[2*i]   = recv_waits1[i];
1596     recv_waits[2*i+1] = recv_waits2[i];
1597   }
1598   stash->recv_waits = recv_waits;
1599 
1600   PetscCall(PetscFree(recv_waits1));
1601   PetscCall(PetscFree(recv_waits2));
1602 
1603   stash->svalues         = svalues;
1604   stash->sindices        = sindices;
1605   stash->rvalues         = rvalues;
1606   stash->rindices        = rindices;
1607   stash->send_waits      = send_waits;
1608   stash->nsends          = nsends;
1609   stash->nrecvs          = nreceives;
1610   stash->reproduce_count = 0;
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615 {
1616   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1617 
1618   PetscFunctionBegin;
1619   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1620   PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
1621   PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
1622   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1623   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 /*@
1628    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1629    the ScaLAPACK matrix
1630 
1631    Logically Collective on A
1632 
1633    Input Parameters:
1634 +  A  - a MATSCALAPACK matrix
1635 .  mb - the row block size
1636 -  nb - the column block size
1637 
1638    Level: intermediate
1639 
1640 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1641 @*/
1642 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1643 {
1644   PetscFunctionBegin;
1645   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1646   PetscValidLogicalCollectiveInt(A,mb,2);
1647   PetscValidLogicalCollectiveInt(A,nb,3);
1648   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1653 {
1654   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1655 
1656   PetscFunctionBegin;
1657   if (mb) *mb = a->mb;
1658   if (nb) *nb = a->nb;
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 /*@
1663    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1664    the ScaLAPACK matrix
1665 
1666    Not collective
1667 
1668    Input Parameter:
1669 .  A  - a MATSCALAPACK matrix
1670 
1671    Output Parameters:
1672 +  mb - the row block size
1673 -  nb - the column block size
1674 
1675    Level: intermediate
1676 
1677 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1678 @*/
1679 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1680 {
1681   PetscFunctionBegin;
1682   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1683   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1688 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1689 
1690 /*MC
1691    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1692 
1693    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1694 
1695    Options Database Keys:
1696 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1697 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1698 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1699 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1700 
1701   Note:
1702    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1703    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1704    the given rank.
1705 
1706    Level: beginner
1707 
1708 .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1709 M*/
1710 
1711 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1712 {
1713   Mat_ScaLAPACK      *a;
1714   PetscBool          flg,flg1;
1715   Mat_ScaLAPACK_Grid *grid;
1716   MPI_Comm           icomm;
1717   PetscBLASInt       nprow,npcol,myrow,mycol;
1718   PetscInt           optv1,k=2,array[2]={0,0};
1719   PetscMPIInt        size;
1720 
1721   PetscFunctionBegin;
1722   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1723   A->insertmode = NOT_SET_VALUES;
1724 
1725   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1726   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1727   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1728   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1729   A->stash.ScatterDestroy = NULL;
1730 
1731   PetscCall(PetscNewLog(A,&a));
1732   A->data = (void*)a;
1733 
1734   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1735   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1736     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
1737     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1738     PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1739   }
1740   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1741   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1742   if (!flg) {
1743     PetscCall(PetscNewLog(A,&grid));
1744 
1745     PetscCallMPI(MPI_Comm_size(icomm,&size));
1746     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1747 
1748     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1749     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1750     if (flg1) {
1751       PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1752       grid->nprow = optv1;
1753     }
1754     PetscOptionsEnd();
1755 
1756     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1757     grid->npcol = size/grid->nprow;
1758     PetscCall(PetscBLASIntCast(grid->nprow,&nprow));
1759     PetscCall(PetscBLASIntCast(grid->npcol,&npcol));
1760     grid->ictxt = Csys2blacs_handle(icomm);
1761     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1762     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1763     grid->grid_refct = 1;
1764     grid->nprow      = nprow;
1765     grid->npcol      = npcol;
1766     grid->myrow      = myrow;
1767     grid->mycol      = mycol;
1768     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1769     grid->ictxrow = Csys2blacs_handle(icomm);
1770     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1771     grid->ictxcol = Csys2blacs_handle(icomm);
1772     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1773     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1774 
1775   } else grid->grid_refct++;
1776   PetscCall(PetscCommDestroy(&icomm));
1777   a->grid = grid;
1778   a->mb   = DEFAULT_BLOCKSIZE;
1779   a->nb   = DEFAULT_BLOCKSIZE;
1780 
1781   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1782   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1783   if (flg) {
1784     a->mb = array[0];
1785     a->nb = (k>1)? array[1]: a->mb;
1786   }
1787   PetscOptionsEnd();
1788 
1789   a->roworiented = PETSC_TRUE;
1790   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
1791   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
1792   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
1793   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /*@C
1798    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1799    (2D block cyclic distribution).
1800 
1801    Collective
1802 
1803    Input Parameters:
1804 +  comm - MPI communicator
1805 .  mb   - row block size (or PETSC_DECIDE to have it set)
1806 .  nb   - column block size (or PETSC_DECIDE to have it set)
1807 .  M    - number of global rows
1808 .  N    - number of global columns
1809 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1810 -  csrc - coordinate of process that owns the first column of the distributed matrix
1811 
1812    Output Parameter:
1813 .  A - the matrix
1814 
1815    Options Database Keys:
1816 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1817 
1818    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1819    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1820    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1821 
1822    Notes:
1823    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1824    is chosen.
1825 
1826    Storage Information:
1827    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1828    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1829    significance and are thus ignored. The block sizes refer to the values
1830    used for the distributed matrix, not the same meaning as in BAIJ.
1831 
1832    Level: intermediate
1833 
1834 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1835 @*/
1836 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1837 {
1838   Mat_ScaLAPACK  *a;
1839   PetscInt       m,n;
1840 
1841   PetscFunctionBegin;
1842   PetscCall(MatCreate(comm,A));
1843   PetscCall(MatSetType(*A,MATSCALAPACK));
1844   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1845   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1846   m = PETSC_DECIDE;
1847   PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1848   n = PETSC_DECIDE;
1849   PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1850   PetscCall(MatSetSizes(*A,m,n,M,N));
1851   a = (Mat_ScaLAPACK*)(*A)->data;
1852   PetscCall(PetscBLASIntCast(M,&a->M));
1853   PetscCall(PetscBLASIntCast(N,&a->N));
1854   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1855   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1856   PetscCall(PetscBLASIntCast(rsrc,&a->rsrc));
1857   PetscCall(PetscBLASIntCast(csrc,&a->csrc));
1858   PetscCall(MatSetUp(*A));
1859   PetscFunctionReturn(0);
1860 }
1861