xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision b458e8f169278db94fa1d489e1d3db410fc8a4c8)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12 {
13   PetscErrorCode    ierr;
14   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
15   PetscBool         iascii;
16   PetscViewerFormat format;
17   Mat               Adense;
18 
19   PetscFunctionBegin;
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21   if (iascii) {
22     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28       PetscFunctionReturn(0);
29     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30       PetscFunctionReturn(0);
31     }
32   }
33   /* convert to dense format and call MatView() */
34   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41 {
42   PetscErrorCode ierr;
43   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
44   PetscLogDouble isend[2],irecv[2];
45 
46   PetscFunctionBegin;
47   info->block_size = 1.0;
48 
49   isend[0] = a->lld*a->locc;     /* locally allocated */
50   isend[1] = a->locr*a->locc;    /* used submatrix */
51   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52     info->nz_allocated   = isend[0];
53     info->nz_used        = isend[1];
54   } else if (flag == MAT_GLOBAL_MAX) {
55     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56     info->nz_allocated   = irecv[0];
57     info->nz_used        = irecv[1];
58   } else if (flag == MAT_GLOBAL_SUM) {
59     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60     info->nz_allocated   = irecv[0];
61     info->nz_used        = irecv[1];
62   }
63 
64   info->nz_unneeded       = 0;
65   info->assemblies        = A->num_ass;
66   info->mallocs           = 0;
67   info->memory            = ((PetscObject)A)->mem;
68   info->fill_ratio_given  = 0;
69   info->fill_ratio_needed = 0;
70   info->factor_mallocs    = 0;
71   PetscFunctionReturn(0);
72 }
73 
74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75 {
76   PetscFunctionBegin;
77   switch (op) {
78     case MAT_NEW_NONZERO_LOCATIONS:
79     case MAT_NEW_NONZERO_LOCATION_ERR:
80     case MAT_NEW_NONZERO_ALLOCATION_ERR:
81     case MAT_SYMMETRIC:
82     case MAT_SORTED_FULL:
83     case MAT_HERMITIAN:
84       break;
85     default:
86       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92 {
93   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j;
96   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
97 
98   PetscFunctionBegin;
99   for (i=0;i<nr;i++) {
100     if (rows[i] < 0) continue;
101     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102     for (j=0;j<nc;j++) {
103       if (cols[j] < 0) continue;
104       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107         switch (imode) {
108           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111         }
112       } else {
113         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114         A->assembled = PETSC_FALSE;
115         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116       }
117     }
118   }
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123 {
124   PetscErrorCode ierr;
125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126   PetscScalar    *x2d,*y2d,alpha=1.0;
127   const PetscInt *ranges;
128   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129 
130   PetscFunctionBegin;
131   if (transpose) {
132 
133     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
136     xlld = PetscMax(1,A->rmap->n);
137     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138     PetscCheckScaLapackInfo("descinit",info);
139     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
141     ylld = 1;
142     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143     PetscCheckScaLapackInfo("descinit",info);
144 
145     /* allocate 2d vectors */
146     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149     xlld = PetscMax(1,lszx);
150 
151     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155     PetscCheckScaLapackInfo("descinit",info);
156 
157     /* redistribute x as a column of a 2d matrix */
158     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159 
160     /* redistribute y as a row of a 2d matrix */
161     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162 
163     /* call PBLAS subroutine */
164     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165 
166     /* redistribute y from a row of a 2d matrix */
167     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168 
169   } else {   /* non-transpose */
170 
171     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
174     xlld = 1;
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
179     ylld = PetscMax(1,A->rmap->n);
180     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182 
183     /* allocate 2d vectors */
184     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187     ylld = PetscMax(1,lszy);
188 
189     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193     PetscCheckScaLapackInfo("descinit",info);
194 
195     /* redistribute x as a row of a 2d matrix */
196     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197 
198     /* redistribute y as a column of a 2d matrix */
199     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200 
201     /* call PBLAS subroutine */
202     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203 
204     /* redistribute y from a column of a 2d matrix */
205     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206 
207   }
208   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213 {
214   PetscErrorCode    ierr;
215   const PetscScalar *xarray;
216   PetscScalar       *yarray;
217 
218   PetscFunctionBegin;
219   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   PetscErrorCode    ierr;
230   const PetscScalar *xarray;
231   PetscScalar       *yarray;
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243 {
244   PetscErrorCode    ierr;
245   const PetscScalar *xarray;
246   PetscScalar       *zarray;
247 
248   PetscFunctionBegin;
249   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259 {
260   PetscErrorCode    ierr;
261   const PetscScalar *xarray;
262   PetscScalar       *zarray;
263 
264   PetscFunctionBegin;
265   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275 {
276   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279   PetscScalar   sone=1.0,zero=0.0;
280   PetscBLASInt  one=1;
281 
282   PetscFunctionBegin;
283   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284   C->assembled = PETSC_TRUE;
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295   ierr = MatSetUp(C);CHKERRQ(ierr);
296   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301 {
302   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305   PetscScalar   sone=1.0,zero=0.0;
306   PetscBLASInt  one=1;
307 
308   PetscFunctionBegin;
309   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310   C->assembled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321   ierr = MatSetUp(C);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327 {
328   PetscFunctionBegin;
329   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330   C->ops->productsymbolic = MatProductSymbolic_AB;
331   PetscFunctionReturn(0);
332 }
333 
334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335 {
336   PetscFunctionBegin;
337   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339   PetscFunctionReturn(0);
340 }
341 
342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343 {
344   PetscErrorCode ierr;
345   Mat_Product    *product = C->product;
346 
347   PetscFunctionBegin;
348   switch (product->type) {
349     case MATPRODUCT_AB:
350       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351       break;
352     case MATPRODUCT_ABt:
353       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354       break;
355     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356   }
357   PetscFunctionReturn(0);
358 }
359 /* --------------------------------------- */
360 
361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362 {
363   PetscErrorCode    ierr;
364   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365   PetscScalar       *darray,*d2d,v;
366   const PetscInt    *ranges;
367   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368 
369   PetscFunctionBegin;
370   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371 
372   if (A->rmap->N<=A->cmap->N) {   /* row version */
373 
374     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
377     dlld = PetscMax(1,A->rmap->n);
378     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379     PetscCheckScaLapackInfo("descinit",info);
380 
381     /* allocate 2d vector */
382     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384     dlld = PetscMax(1,lszd);
385 
386     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388     PetscCheckScaLapackInfo("descinit",info);
389 
390     /* collect diagonal */
391     for (j=1;j<=a->M;j++) {
392       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394     }
395 
396     /* redistribute d from a column of a 2d matrix */
397     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398     ierr = PetscFree(d2d);CHKERRQ(ierr);
399 
400   } else {   /* column version */
401 
402     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
405     dlld = 1;
406     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407     PetscCheckScaLapackInfo("descinit",info);
408 
409     /* allocate 2d vector */
410     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412 
413     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415     PetscCheckScaLapackInfo("descinit",info);
416 
417     /* collect diagonal */
418     for (j=1;j<=a->N;j++) {
419       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421     }
422 
423     /* redistribute d from a row of a 2d matrix */
424     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425     ierr = PetscFree(d2d);CHKERRQ(ierr);
426   }
427 
428   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435 {
436   PetscErrorCode    ierr;
437   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438   const PetscScalar *d;
439   const PetscInt    *ranges;
440   PetscScalar       *d2d;
441   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442 
443   PetscFunctionBegin;
444   if (R) {
445     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
449     dlld = 1;
450     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451     PetscCheckScaLapackInfo("descinit",info);
452 
453     /* allocate 2d vector */
454     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456 
457     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459     PetscCheckScaLapackInfo("descinit",info);
460 
461     /* redistribute d to a row of a 2d matrix */
462     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463 
464     /* broadcast along process columns */
465     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467 
468     /* local scaling */
469     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470 
471     ierr = PetscFree(d2d);CHKERRQ(ierr);
472     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473   }
474   if (L) {
475     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
479     dlld = PetscMax(1,A->rmap->n);
480     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481     PetscCheckScaLapackInfo("descinit",info);
482 
483     /* allocate 2d vector */
484     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486     dlld = PetscMax(1,lszd);
487 
488     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490     PetscCheckScaLapackInfo("descinit",info);
491 
492     /* redistribute d to a column of a 2d matrix */
493     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494 
495     /* broadcast along process rows */
496     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498 
499     /* local scaling */
500     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501 
502     ierr = PetscFree(d2d);CHKERRQ(ierr);
503     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509 {
510   PetscFunctionBegin;
511   *missing = PETSC_FALSE;
512   PetscFunctionReturn(0);
513 }
514 
515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516 {
517   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518   PetscBLASInt  n,one=1;
519 
520   PetscFunctionBegin;
521   n = x->lld*x->locc;
522   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523   PetscFunctionReturn(0);
524 }
525 
526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527 {
528   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529   PetscBLASInt  i,n;
530   PetscScalar   v;
531 
532   PetscFunctionBegin;
533   n = PetscMin(x->M,x->N);
534   for (i=1;i<=n;i++) {
535     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536     v += alpha;
537     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543 {
544   PetscErrorCode ierr;
545   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547   PetscBLASInt   one=1;
548   PetscScalar    beta=1.0;
549 
550   PetscFunctionBegin;
551   MatScaLAPACKCheckDistribution(Y,1,X,3);
552   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558 {
559   PetscErrorCode ierr;
560   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
562 
563   PetscFunctionBegin;
564   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570 {
571   Mat            Bs;
572   MPI_Comm       comm;
573   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581   b = (Mat_ScaLAPACK*)Bs->data;
582   b->M    = a->M;
583   b->N    = a->N;
584   b->mb   = a->mb;
585   b->nb   = a->nb;
586   b->rsrc = a->rsrc;
587   b->csrc = a->csrc;
588   ierr = MatSetUp(Bs);CHKERRQ(ierr);
589   *B = Bs;
590   if (op == MAT_COPY_VALUES) {
591     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592   }
593   Bs->assembled = PETSC_TRUE;
594   PetscFunctionReturn(0);
595 }
596 
597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598 {
599   PetscErrorCode ierr;
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613     ierr = MatSetUp(Bs);CHKERRQ(ierr);
614     *B = Bs;
615   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616   b = (Mat_ScaLAPACK*)Bs->data;
617   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618 #if defined(PETSC_USE_COMPLEX)
619   /* undo conjugation */
620   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621 #endif
622   Bs->assembled = PETSC_TRUE;
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627 {
628   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629   PetscInt      i,j;
630 
631   PetscFunctionBegin;
632   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637 {
638   PetscErrorCode ierr;
639   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640   Mat            Bs = *B;
641   PetscBLASInt   one=1;
642   PetscScalar    sone=1.0,zero=0.0;
643 
644   PetscFunctionBegin;
645   if (reuse == MAT_INITIAL_MATRIX) {
646     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649     ierr = MatSetUp(Bs);CHKERRQ(ierr);
650     *B = Bs;
651   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652   b = (Mat_ScaLAPACK*)Bs->data;
653   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654   Bs->assembled = PETSC_TRUE;
655   PetscFunctionReturn(0);
656 }
657 
658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659 {
660   PetscErrorCode ierr;
661   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662   PetscScalar    *x,*x2d;
663   const PetscInt *ranges;
664   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665 
666   PetscFunctionBegin;
667   ierr = VecCopy(B,X);CHKERRQ(ierr);
668   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669 
670   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
673   xlld = PetscMax(1,A->rmap->n);
674   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675   PetscCheckScaLapackInfo("descinit",info);
676 
677   /* allocate 2d vector */
678   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680   xlld = PetscMax(1,lszx);
681 
682   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684   PetscCheckScaLapackInfo("descinit",info);
685 
686   /* redistribute x as a column of a 2d matrix */
687   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688 
689   /* call ScaLAPACK subroutine */
690   switch (A->factortype) {
691     case MAT_FACTOR_LU:
692       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693       PetscCheckScaLapackInfo("getrs",info);
694       break;
695     case MAT_FACTOR_CHOLESKY:
696       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("potrs",info);
698       break;
699     default:
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701   }
702 
703   /* redistribute x from a column of a 2d matrix */
704   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
705 
706   ierr = PetscFree(x2d);CHKERRQ(ierr);
707   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
712 {
713   PetscErrorCode ierr;
714 
715   PetscFunctionBegin;
716   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
717   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
722 {
723   PetscErrorCode ierr;
724   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
725   PetscBool      flg1,flg2;
726   PetscBLASInt   one=1,info;
727 
728   PetscFunctionBegin;
729   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
731   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
732   MatScaLAPACKCheckDistribution(B,1,X,2);
733   b = (Mat_ScaLAPACK*)B->data;
734   x = (Mat_ScaLAPACK*)X->data;
735   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
736 
737   switch (A->factortype) {
738     case MAT_FACTOR_LU:
739       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
740       PetscCheckScaLapackInfo("getrs",info);
741       break;
742     case MAT_FACTOR_CHOLESKY:
743       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
744       PetscCheckScaLapackInfo("potrs",info);
745       break;
746     default:
747       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
753 {
754   PetscErrorCode ierr;
755   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
756   PetscBLASInt   one=1,info;
757 
758   PetscFunctionBegin;
759   if (!a->pivots) {
760     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
761     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
762   }
763   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
764   PetscCheckScaLapackInfo("getrf",info);
765   A->factortype = MAT_FACTOR_LU;
766   A->assembled  = PETSC_TRUE;
767 
768   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
769   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 
773 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
774 {
775   PetscErrorCode ierr;
776 
777   PetscFunctionBegin;
778   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
779   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
784 {
785   PetscFunctionBegin;
786   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
787   PetscFunctionReturn(0);
788 }
789 
790 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
791 {
792   PetscErrorCode ierr;
793   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
794   PetscBLASInt   one=1,info;
795 
796   PetscFunctionBegin;
797   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
798   PetscCheckScaLapackInfo("potrf",info);
799   A->factortype = MAT_FACTOR_CHOLESKY;
800   A->assembled  = PETSC_TRUE;
801 
802   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
803   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
804   PetscFunctionReturn(0);
805 }
806 
807 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
808 {
809   PetscErrorCode ierr;
810 
811   PetscFunctionBegin;
812   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
813   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
818 {
819   PetscFunctionBegin;
820   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
825 {
826   PetscFunctionBegin;
827   *type = MATSOLVERSCALAPACK;
828   PetscFunctionReturn(0);
829 }
830 
831 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
832 {
833   Mat            B;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* Create the factorization matrix */
838   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
839   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
840   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
841   ierr = MatSetUp(B);CHKERRQ(ierr);
842   B->factortype = ftype;
843   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
844   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
845 
846   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
847   *F = B;
848   PetscFunctionReturn(0);
849 }
850 
851 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
852 {
853   PetscErrorCode ierr;
854 
855   PetscFunctionBegin;
856   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
857   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
862 {
863   PetscErrorCode ierr;
864   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
865   PetscBLASInt   one=1,lwork=0;
866   const char     *ntype;
867   PetscScalar    *work=NULL,dummy;
868 
869   PetscFunctionBegin;
870   switch (type){
871     case NORM_1:
872       ntype = "1";
873       lwork = PetscMax(a->locr,a->locc);
874       break;
875     case NORM_FROBENIUS:
876       ntype = "F";
877       work  = &dummy;
878       break;
879     case NORM_INFINITY:
880       ntype = "I";
881       lwork = PetscMax(a->locr,a->locc);
882       break;
883     default:
884       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
885   }
886   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
887   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
888   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
889   PetscFunctionReturn(0);
890 }
891 
892 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
893 {
894   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
903 {
904   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
905   PetscErrorCode ierr;
906   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
907 
908   PetscFunctionBegin;
909   if (rows) {
910     n     = a->locr;
911     nb    = a->mb;
912     isrc  = a->rsrc;
913     nproc = a->grid->nprow;
914     iproc = a->grid->myrow;
915     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
916     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
917     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
918   }
919   if (cols) {
920     n     = a->locc;
921     nb    = a->nb;
922     isrc  = a->csrc;
923     nproc = a->grid->npcol;
924     iproc = a->grid->mycol;
925     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
926     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
927     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
933 {
934   PetscErrorCode ierr;
935   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
936   Mat            Bmpi;
937   MPI_Comm       comm;
938   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
939   const PetscInt *ranges,*branges,*cwork;
940   const PetscScalar *vwork;
941   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
942   PetscScalar    *barray;
943   PetscBool      differ=PETSC_FALSE;
944   PetscMPIInt    size;
945 
946   PetscFunctionBegin;
947   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
948   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
949 
950   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
951     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
952     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
953     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
954   }
955 
956   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
957     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
958     m = PETSC_DECIDE;
959     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
960     n = PETSC_DECIDE;
961     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
962     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
963     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
964     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
965 
966     /* create ScaLAPACK descriptor for B (1d block distribution) */
967     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
968     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
969     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
970     PetscCheckScaLapackInfo("descinit",info);
971 
972     /* redistribute matrix */
973     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
974     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
975     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978 
979     /* transfer rows of auxiliary matrix to the final matrix B */
980     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
981     for (i=rstart;i<rend;i++) {
982       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
983       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
984       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
985     }
986     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
989 
990   } else {  /* normal cases */
991 
992     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
993     else {
994       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
995       m = PETSC_DECIDE;
996       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
997       n = PETSC_DECIDE;
998       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
999       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1000       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1001       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1002     }
1003 
1004     /* create ScaLAPACK descriptor for B (1d block distribution) */
1005     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1006     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1007     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1008     PetscCheckScaLapackInfo("descinit",info);
1009 
1010     /* redistribute matrix */
1011     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1012     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1013     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1014 
1015     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1016     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1017     if (reuse == MAT_INPLACE_MATRIX) {
1018       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1019     } else *B = Bmpi;
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1025 {
1026   PetscErrorCode ierr;
1027   Mat_ScaLAPACK  *b;
1028   Mat            Bmpi;
1029   MPI_Comm       comm;
1030   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1031   const PetscInt *ranges;
1032   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1033   PetscScalar    *aarray;
1034   PetscInt       lda;
1035 
1036   PetscFunctionBegin;
1037   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1038 
1039   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1040   else {
1041     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1042     m = PETSC_DECIDE;
1043     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1044     n = PETSC_DECIDE;
1045     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1046     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1047     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1048     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1049   }
1050   b = (Mat_ScaLAPACK*)Bmpi->data;
1051 
1052   /* create ScaLAPACK descriptor for A (1d block distribution) */
1053   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1054   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1055   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1056   lld = PetscMax(lda,1);  /* local leading dimension */
1057   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1058   PetscCheckScaLapackInfo("descinit",info);
1059 
1060   /* redistribute matrix */
1061   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1062   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1063   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1064 
1065   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1066   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1067   if (reuse == MAT_INPLACE_MATRIX) {
1068     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1069   } else *B = Bmpi;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1074 {
1075   Mat               mat_scal;
1076   PetscErrorCode    ierr;
1077   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1078   const PetscInt    *cols;
1079   const PetscScalar *vals;
1080 
1081   PetscFunctionBegin;
1082   if (reuse == MAT_REUSE_MATRIX) {
1083     mat_scal = *newmat;
1084     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1085   } else {
1086     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1087     m = PETSC_DECIDE;
1088     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1089     n = PETSC_DECIDE;
1090     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1091     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1092     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1093     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1094   }
1095   for (row=rstart;row<rend;row++) {
1096     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1097     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1098     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1099   }
1100   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1101   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1102 
1103   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1104   else *newmat = mat_scal;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1109 {
1110   Mat               mat_scal;
1111   PetscErrorCode    ierr;
1112   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1113   const PetscInt    *cols;
1114   const PetscScalar *vals;
1115   PetscScalar       v;
1116 
1117   PetscFunctionBegin;
1118   if (reuse == MAT_REUSE_MATRIX) {
1119     mat_scal = *newmat;
1120     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1121   } else {
1122     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1123     m = PETSC_DECIDE;
1124     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1125     n = PETSC_DECIDE;
1126     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1127     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1128     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1129     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1130   }
1131   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1132   for (row=rstart;row<rend;row++) {
1133     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1134     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1135     for (j=0;j<ncols;j++) { /* lower triangular part */
1136       if (cols[j] == row) continue;
1137       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1138       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1139     }
1140     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1141   }
1142   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1143   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1144   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1145 
1146   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1147   else *newmat = mat_scal;
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1152 {
1153   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1154   PetscErrorCode ierr;
1155   PetscInt       sz=0;
1156 
1157   PetscFunctionBegin;
1158   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1159   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1160   if (!a->lld) a->lld = a->locr;
1161 
1162   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1163   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1164   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1165   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1166 
1167   A->preallocated = PETSC_TRUE;
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1172 {
1173   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1174   PetscErrorCode     ierr;
1175   Mat_ScaLAPACK_Grid *grid;
1176   PetscBool          flg;
1177   MPI_Comm           icomm;
1178 
1179   PetscFunctionBegin;
1180   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1181   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1182   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1183   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1184   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1185   if (--grid->grid_refct == 0) {
1186     Cblacs_gridexit(grid->ictxt);
1187     Cblacs_gridexit(grid->ictxrow);
1188     Cblacs_gridexit(grid->ictxcol);
1189     ierr = PetscFree(grid);CHKERRQ(ierr);
1190     ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1191   }
1192   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1193   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscFree(A->data);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1202 {
1203   PetscErrorCode ierr;
1204   const PetscInt *ranges;
1205   PetscMPIInt    size;
1206   PetscInt       i,n;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1210   if (size>2) {
1211     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1212     n = ranges[1]-ranges[0];
1213     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1214     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1220 {
1221   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1222   PetscErrorCode ierr;
1223   PetscBLASInt   info=0;
1224 
1225   PetscFunctionBegin;
1226   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1227   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1228 
1229   /* check that the layout is as enforced by MatCreateScaLAPACK */
1230   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1231   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1232 
1233   /* compute local sizes */
1234   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1235   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1236   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1237   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1238   a->lld  = PetscMax(1,a->locr);
1239 
1240   /* allocate local array */
1241   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1242 
1243   /* set up ScaLAPACK descriptor */
1244   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1245   PetscCheckScaLapackInfo("descinit",info);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1250 {
1251   PetscErrorCode ierr;
1252   PetscInt       nstash,reallocs;
1253 
1254   PetscFunctionBegin;
1255   if (A->nooffprocentries) PetscFunctionReturn(0);
1256   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1257   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1258   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1263 {
1264   PetscErrorCode ierr;
1265   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1266   PetscMPIInt    n;
1267   PetscInt       i,flg,*row,*col;
1268   PetscScalar    *val;
1269   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1270 
1271   PetscFunctionBegin;
1272   if (A->nooffprocentries) PetscFunctionReturn(0);
1273   while (1) {
1274     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1275     if (!flg) break;
1276     for (i=0;i<n;i++) {
1277       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1278       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1279       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1280       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1281       switch (A->insertmode) {
1282         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1283         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1284         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1285       }
1286     }
1287   }
1288   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1293 {
1294   PetscErrorCode ierr;
1295   Mat            Adense,As;
1296   MPI_Comm       comm;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1300   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1301   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1302   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1303   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1304   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1305   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /* -------------------------------------------------------------------*/
1310 static struct _MatOps MatOps_Values = {
1311        MatSetValues_ScaLAPACK,
1312        0,
1313        0,
1314        MatMult_ScaLAPACK,
1315 /* 4*/ MatMultAdd_ScaLAPACK,
1316        MatMultTranspose_ScaLAPACK,
1317        MatMultTransposeAdd_ScaLAPACK,
1318        MatSolve_ScaLAPACK,
1319        MatSolveAdd_ScaLAPACK,
1320        0,
1321 /*10*/ 0,
1322        MatLUFactor_ScaLAPACK,
1323        MatCholeskyFactor_ScaLAPACK,
1324        0,
1325        MatTranspose_ScaLAPACK,
1326 /*15*/ MatGetInfo_ScaLAPACK,
1327        0,
1328        MatGetDiagonal_ScaLAPACK,
1329        MatDiagonalScale_ScaLAPACK,
1330        MatNorm_ScaLAPACK,
1331 /*20*/ MatAssemblyBegin_ScaLAPACK,
1332        MatAssemblyEnd_ScaLAPACK,
1333        MatSetOption_ScaLAPACK,
1334        MatZeroEntries_ScaLAPACK,
1335 /*24*/ 0,
1336        MatLUFactorSymbolic_ScaLAPACK,
1337        MatLUFactorNumeric_ScaLAPACK,
1338        MatCholeskyFactorSymbolic_ScaLAPACK,
1339        MatCholeskyFactorNumeric_ScaLAPACK,
1340 /*29*/ MatSetUp_ScaLAPACK,
1341        0,
1342        0,
1343        0,
1344        0,
1345 /*34*/ MatDuplicate_ScaLAPACK,
1346        0,
1347        0,
1348        0,
1349        0,
1350 /*39*/ MatAXPY_ScaLAPACK,
1351        0,
1352        0,
1353        0,
1354        MatCopy_ScaLAPACK,
1355 /*44*/ 0,
1356        MatScale_ScaLAPACK,
1357        MatShift_ScaLAPACK,
1358        0,
1359        0,
1360 /*49*/ 0,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*54*/ 0,
1366        0,
1367        0,
1368        0,
1369        0,
1370 /*59*/ 0,
1371        MatDestroy_ScaLAPACK,
1372        MatView_ScaLAPACK,
1373        0,
1374        0,
1375 /*64*/ 0,
1376        0,
1377        0,
1378        0,
1379        0,
1380 /*69*/ 0,
1381        0,
1382        MatConvert_ScaLAPACK_Dense,
1383        0,
1384        0,
1385 /*74*/ 0,
1386        0,
1387        0,
1388        0,
1389        0,
1390 /*79*/ 0,
1391        0,
1392        0,
1393        0,
1394        MatLoad_ScaLAPACK,
1395 /*84*/ 0,
1396        0,
1397        0,
1398        0,
1399        0,
1400 /*89*/ 0,
1401        0,
1402        MatMatMultNumeric_ScaLAPACK,
1403        0,
1404        0,
1405 /*94*/ 0,
1406        0,
1407        0,
1408        MatMatTransposeMultNumeric_ScaLAPACK,
1409        0,
1410 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1411        0,
1412        0,
1413        MatConjugate_ScaLAPACK,
1414        0,
1415 /*104*/0,
1416        0,
1417        0,
1418        0,
1419        0,
1420 /*109*/MatMatSolve_ScaLAPACK,
1421        0,
1422        0,
1423        0,
1424        MatMissingDiagonal_ScaLAPACK,
1425 /*114*/0,
1426        0,
1427        0,
1428        0,
1429        0,
1430 /*119*/0,
1431        MatHermitianTranspose_ScaLAPACK,
1432        0,
1433        0,
1434        0,
1435 /*124*/0,
1436        0,
1437        0,
1438        0,
1439        0,
1440 /*129*/0,
1441        0,
1442        0,
1443        0,
1444        0,
1445 /*134*/0,
1446        0,
1447        0,
1448        0,
1449        0,
1450        0,
1451 /*140*/0,
1452        0,
1453        0,
1454        0,
1455        0,
1456 /*145*/0,
1457        0,
1458        0
1459 };
1460 
1461 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1462 {
1463   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1464   PetscInt           size=stash->size,nsends;
1465   PetscErrorCode     ierr;
1466   PetscInt           count,*sindices,**rindices,i,j,l;
1467   PetscScalar        **rvalues,*svalues;
1468   MPI_Comm           comm = stash->comm;
1469   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1470   PetscMPIInt        *sizes,*nlengths,nreceives;
1471   PetscInt           *sp_idx,*sp_idy;
1472   PetscScalar        *sp_val;
1473   PetscMatStashSpace space,space_next;
1474   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1475   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1476 
1477   PetscFunctionBegin;
1478   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1479     InsertMode addv;
1480     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1481     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1482     mat->insertmode = addv; /* in case this processor had no cache */
1483   }
1484 
1485   bs2 = stash->bs*stash->bs;
1486 
1487   /*  first count number of contributors to each processor */
1488   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1489   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1490 
1491   i     = j    = 0;
1492   space = stash->space_head;
1493   while (space) {
1494     space_next = space->next;
1495     for (l=0; l<space->local_used; l++) {
1496       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1497       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1498       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1499       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1500       nlengths[j]++; owner[i] = j;
1501       i++;
1502     }
1503     space = space_next;
1504   }
1505 
1506   /* Now check what procs get messages - and compute nsends. */
1507   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1508   for (i=0, nsends=0; i<size; i++) {
1509     if (nlengths[i]) {
1510       sizes[i] = 1; nsends++;
1511     }
1512   }
1513 
1514   {PetscMPIInt *onodes,*olengths;
1515    /* Determine the number of messages to expect, their lengths, from from-ids */
1516    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1517    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1518    /* since clubbing row,col - lengths are multiplied by 2 */
1519    for (i=0; i<nreceives; i++) olengths[i] *=2;
1520    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1521    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1522    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1523    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1524    ierr = PetscFree(onodes);CHKERRQ(ierr);
1525    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1526 
1527   /* do sends:
1528       1) starts[i] gives the starting index in svalues for stuff going to
1529          the ith processor
1530   */
1531   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1532   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1533   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1534   /* use 2 sends the first with all_a, the next with all_i and all_j */
1535   startv[0] = 0; starti[0] = 0;
1536   for (i=1; i<size; i++) {
1537     startv[i] = startv[i-1] + nlengths[i-1];
1538     starti[i] = starti[i-1] + 2*nlengths[i-1];
1539   }
1540 
1541   i     = 0;
1542   space = stash->space_head;
1543   while (space) {
1544     space_next = space->next;
1545     sp_idx     = space->idx;
1546     sp_idy     = space->idy;
1547     sp_val     = space->val;
1548     for (l=0; l<space->local_used; l++) {
1549       j = owner[i];
1550       if (bs2 == 1) {
1551         svalues[startv[j]] = sp_val[l];
1552       } else {
1553         PetscInt    k;
1554         PetscScalar *buf1,*buf2;
1555         buf1 = svalues+bs2*startv[j];
1556         buf2 = space->val + bs2*l;
1557         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1558       }
1559       sindices[starti[j]]             = sp_idx[l];
1560       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1561       startv[j]++;
1562       starti[j]++;
1563       i++;
1564     }
1565     space = space_next;
1566   }
1567   startv[0] = 0;
1568   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1569 
1570   for (i=0,count=0; i<size; i++) {
1571     if (sizes[i]) {
1572       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1573       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1574     }
1575   }
1576 #if defined(PETSC_USE_INFO)
1577   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1578   for (i=0; i<size; i++) {
1579     if (sizes[i]) {
1580       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1581     }
1582   }
1583 #endif
1584   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1585   ierr = PetscFree(owner);CHKERRQ(ierr);
1586   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1587   ierr = PetscFree(sizes);CHKERRQ(ierr);
1588 
1589   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1590   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1591 
1592   for (i=0; i<nreceives; i++) {
1593     recv_waits[2*i]   = recv_waits1[i];
1594     recv_waits[2*i+1] = recv_waits2[i];
1595   }
1596   stash->recv_waits = recv_waits;
1597 
1598   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1599   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1600 
1601   stash->svalues         = svalues;
1602   stash->sindices        = sindices;
1603   stash->rvalues         = rvalues;
1604   stash->rindices        = rindices;
1605   stash->send_waits      = send_waits;
1606   stash->nsends          = nsends;
1607   stash->nrecvs          = nreceives;
1608   stash->reproduce_count = 0;
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1613 {
1614   PetscErrorCode ierr;
1615   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1616 
1617   PetscFunctionBegin;
1618   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1619   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1620   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1621   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1622   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 /*@
1627    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1628    the ScaLAPACK matrix
1629 
1630    Logically Collective on A
1631 
1632    Input Parameter:
1633 +  A  - a MATSCALAPACK matrix
1634 .  mb - the row block size
1635 -  nb - the column block size
1636 
1637    Level: intermediate
1638 
1639 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1640 @*/
1641 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1642 {
1643   PetscErrorCode ierr;
1644 
1645   PetscFunctionBegin;
1646   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1647   PetscValidLogicalCollectiveInt(A,mb,2);
1648   PetscValidLogicalCollectiveInt(A,nb,3);
1649   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1654 {
1655   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1656 
1657   PetscFunctionBegin;
1658   if (mb) *mb = a->mb;
1659   if (nb) *nb = a->nb;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 /*@
1664    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1665    the ScaLAPACK matrix
1666 
1667    Not collective
1668 
1669    Input Parameter:
1670 .  A  - a MATSCALAPACK matrix
1671 
1672    Output Parameters:
1673 +  mb - the row block size
1674 -  nb - the column block size
1675 
1676    Level: intermediate
1677 
1678 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1679 @*/
1680 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1681 {
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1686   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1691 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1692 
1693 /*MC
1694    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1695 
1696    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1697 
1698    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1699 
1700    Options Database Keys:
1701 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1702 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1703 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1704 
1705    Level: beginner
1706 
1707 .seealso: MATDENSE, MATELEMENTAL
1708 M*/
1709 
1710 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1711 {
1712   Mat_ScaLAPACK      *a;
1713   PetscErrorCode     ierr;
1714   PetscBool          flg,flg1;
1715   Mat_ScaLAPACK_Grid *grid;
1716   MPI_Comm           icomm;
1717   PetscBLASInt       nprow,npcol,myrow,mycol;
1718   PetscInt           optv1,k=2,array[2]={0,0};
1719   PetscMPIInt        size;
1720 
1721   PetscFunctionBegin;
1722   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1723   A->insertmode = NOT_SET_VALUES;
1724 
1725   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1726   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1727   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1728   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1729   A->stash.ScatterDestroy = NULL;
1730 
1731   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1732   A->data = (void*)a;
1733 
1734   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1735   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1736     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1737   }
1738   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1739   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1740   if (!flg) {
1741     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1742 
1743     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1744     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1745 
1746     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1747     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1748     if (flg1) {
1749       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1750       grid->nprow = optv1;
1751     }
1752     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1753 
1754     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1755     grid->npcol = size/grid->nprow;
1756     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1757     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1758     Cblacs_get(-1,0,&grid->ictxt);
1759     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1760     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1761     grid->grid_refct = 1;
1762     grid->nprow      = nprow;
1763     grid->npcol      = npcol;
1764     grid->myrow      = myrow;
1765     grid->mycol      = mycol;
1766     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1767     Cblacs_get(-1,0,&grid->ictxrow);
1768     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1769     Cblacs_get(-1,0,&grid->ictxcol);
1770     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1771     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1772 
1773   } else grid->grid_refct++;
1774   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1775   a->grid = grid;
1776   a->mb   = DEFAULT_BLOCKSIZE;
1777   a->nb   = DEFAULT_BLOCKSIZE;
1778 
1779   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1780   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1781   if (flg) {
1782     a->mb = array[0];
1783     a->nb = (k>1)? array[1]: a->mb;
1784   }
1785   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1786 
1787   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1790   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 /*@C
1795    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1796    (2D block cyclic distribution).
1797 
1798    Collective
1799 
1800    Input Parameters:
1801 +  comm - MPI communicator
1802 .  mb   - row block size (or PETSC_DECIDE to have it set)
1803 .  nb   - column block size (or PETSC_DECIDE to have it set)
1804 .  M    - number of global rows
1805 .  N    - number of global columns
1806 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1807 -  csrc - coordinate of process that owns the first column of the distributed matrix
1808 
1809    Output Parameter:
1810 .  A - the matrix
1811 
1812    Options Database Keys:
1813 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1814 
1815    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1816    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1817    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1818 
1819    Notes:
1820    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1821    is chosen.
1822 
1823    Storage Information:
1824    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1825    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1826    significance and are thus ignored. The block sizes refer to the values
1827    used for the distributed matrix, not the same meaning as in BAIJ.
1828 
1829    Level: intermediate
1830 
1831 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1832 @*/
1833 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1834 {
1835   PetscErrorCode ierr;
1836   Mat_ScaLAPACK  *a;
1837   PetscInt       m,n;
1838 
1839   PetscFunctionBegin;
1840   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1841   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1842   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1843   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1844   m = PETSC_DECIDE;
1845   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1846   n = PETSC_DECIDE;
1847   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1848   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1849   a = (Mat_ScaLAPACK*)(*A)->data;
1850   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1851   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1852   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1853   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1854   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1855   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1856   ierr = MatSetUp(*A);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860