xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision aeccfd6fd46a34231537ae422040d581ee0e5930)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 PetscInt Plapack_nprows,Plapack_npcols;
10 MPI_Comm Plapack_comm_2d;
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatDenseGetLocalMatrix"
14 /*@
15 
16       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
17               matrix that represents the operator. For sequential matrices it returns itself.
18 
19     Input Parameter:
20 .      A - the Seq or MPI dense matrix
21 
22     Output Parameter:
23 .      B - the inner matrix
24 
25     Level: intermediate
26 
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
29 {
30   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
31   PetscErrorCode ierr;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
36   if (flg) {
37     *B = mat->A;
38   } else {
39     *B = A;
40   }
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatGetRow_MPIDense"
46 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
47 {
48   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
49   PetscErrorCode ierr;
50   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
51 
52   PetscFunctionBegin;
53   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
54   lrow = row - rstart;
55   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatRestoreRow_MPIDense"
61 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62 {
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
67   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
75 {
76   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
77   PetscErrorCode ierr;
78   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
79   PetscScalar    *array;
80   MPI_Comm       comm;
81 
82   PetscFunctionBegin;
83   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
84 
85   /* The reuse aspect is not implemented efficiently */
86   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
87 
88   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
89   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
90   ierr = MatCreate(comm,B);CHKERRQ(ierr);
91   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
92   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
93   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
94   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
95   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
97 
98   *iscopy = PETSC_TRUE;
99   PetscFunctionReturn(0);
100 }
101 EXTERN_C_END
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatSetValues_MPIDense"
105 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
106 {
107   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
108   PetscErrorCode ierr;
109   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
110   PetscTruth     roworiented = A->roworiented;
111 
112   PetscFunctionBegin;
113   for (i=0; i<m; i++) {
114     if (idxm[i] < 0) continue;
115     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
116     if (idxm[i] >= rstart && idxm[i] < rend) {
117       row = idxm[i] - rstart;
118       if (roworiented) {
119         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
120       } else {
121         for (j=0; j<n; j++) {
122           if (idxn[j] < 0) continue;
123           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
124           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
125         }
126       }
127     } else {
128       if (!A->donotstash) {
129         if (roworiented) {
130           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
131         } else {
132           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
133         }
134       }
135     }
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatGetValues_MPIDense"
142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
143 {
144   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
145   PetscErrorCode ierr;
146   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
147 
148   PetscFunctionBegin;
149   for (i=0; i<m; i++) {
150     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
151     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
152     if (idxm[i] >= rstart && idxm[i] < rend) {
153       row = idxm[i] - rstart;
154       for (j=0; j<n; j++) {
155         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
156         if (idxn[j] >= mat->cmap.N) {
157           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
158         }
159         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
160       }
161     } else {
162       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
163     }
164   }
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatGetArray_MPIDense"
170 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
182 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
183 {
184   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
185   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
186   PetscErrorCode ierr;
187   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
188   PetscScalar    *av,*bv,*v = lmat->v;
189   Mat            newmat;
190 
191   PetscFunctionBegin;
192   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
193   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
194   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
195   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
196 
197   /* No parallel redistribution currently supported! Should really check each index set
198      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
199      original matrix! */
200 
201   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
202   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
203 
204   /* Check submatrix call */
205   if (scall == MAT_REUSE_MATRIX) {
206     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
207     /* Really need to test rows and column sizes! */
208     newmat = *B;
209   } else {
210     /* Create and fill new matrix */
211     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
212     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
213     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
214     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
215   }
216 
217   /* Now extract the data pointers and do the copy, column at a time */
218   newmatd = (Mat_MPIDense*)newmat->data;
219   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
220 
221   for (i=0; i<ncols; i++) {
222     av = v + nlrows*icol[i];
223     for (j=0; j<nrows; j++) {
224       *bv++ = av[irow[j] - rstart];
225     }
226   }
227 
228   /* Assemble the matrices so that the correct flags are set */
229   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231 
232   /* Free work space */
233   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
234   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
235   *B = newmat;
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreArray_MPIDense"
241 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
242 {
243   PetscFunctionBegin;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm = ((PetscObject)mat)->comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   /* make sure all processors are either in INSERTMODE or ADDMODE */
259   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
260   if (addv == (ADD_VALUES|INSERT_VALUES)) {
261     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   }
263   mat->insertmode = addv; /* in case this processor had no cache */
264 
265   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
266   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
267   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
274 {
275   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
276   PetscErrorCode  ierr;
277   PetscInt        i,*row,*col,flg,j,rstart,ncols;
278   PetscMPIInt     n;
279   PetscScalar     *val;
280   InsertMode      addv=mat->insertmode;
281 
282   PetscFunctionBegin;
283   /*  wait on receives */
284   while (1) {
285     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
286     if (!flg) break;
287 
288     for (i=0; i<n;) {
289       /* Now identify the consecutive vals belonging to the same row */
290       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
295       i = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
330 {
331   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode ierr;
333   PetscInt       i,*owners = A->rmap.range;
334   PetscInt       *nprocs,j,idx,nsends;
335   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt       *lens,*lrows,*values;
338   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm       comm = ((PetscObject)A)->comm;
340   MPI_Request    *send_waits,*recv_waits;
341   MPI_Status     recv_status,*send_status;
342   PetscTruth     found;
343 
344   PetscFunctionBegin;
345   /*  first count number of contributors to each processor */
346   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
347   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
348   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
349   for (i=0; i<N; i++) {
350     idx = rows[i];
351     found = PETSC_FALSE;
352     for (j=0; j<size; j++) {
353       if (idx >= owners[j] && idx < owners[j+1]) {
354         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
355       }
356     }
357     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
358   }
359   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
360 
361   /* inform other processors of number of messages and max length*/
362   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
363 
364   /* post receives:   */
365   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
366   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
367   for (i=0; i<nrecvs; i++) {
368     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
369   }
370 
371   /* do sends:
372       1) starts[i] gives the starting index in svalues for stuff going to
373          the ith processor
374   */
375   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
376   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
377   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
378   starts[0]  = 0;
379   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
380   for (i=0; i<N; i++) {
381     svalues[starts[owner[i]]++] = rows[i];
382   }
383 
384   starts[0] = 0;
385   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
386   count = 0;
387   for (i=0; i<size; i++) {
388     if (nprocs[2*i+1]) {
389       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
390     }
391   }
392   ierr = PetscFree(starts);CHKERRQ(ierr);
393 
394   base = owners[rank];
395 
396   /*  wait on receives */
397   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
398   source = lens + nrecvs;
399   count  = nrecvs; slen = 0;
400   while (count) {
401     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
402     /* unpack receives into our local space */
403     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
404     source[imdex]  = recv_status.MPI_SOURCE;
405     lens[imdex]    = n;
406     slen += n;
407     count--;
408   }
409   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
410 
411   /* move the data into the send scatter */
412   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
413   count = 0;
414   for (i=0; i<nrecvs; i++) {
415     values = rvalues + i*nmax;
416     for (j=0; j<lens[i]; j++) {
417       lrows[count++] = values[j] - base;
418     }
419   }
420   ierr = PetscFree(rvalues);CHKERRQ(ierr);
421   ierr = PetscFree(lens);CHKERRQ(ierr);
422   ierr = PetscFree(owner);CHKERRQ(ierr);
423   ierr = PetscFree(nprocs);CHKERRQ(ierr);
424 
425   /* actually zap the local rows */
426   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
427   ierr = PetscFree(lrows);CHKERRQ(ierr);
428 
429   /* wait on sends */
430   if (nsends) {
431     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
432     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
433     ierr = PetscFree(send_status);CHKERRQ(ierr);
434   }
435   ierr = PetscFree(send_waits);CHKERRQ(ierr);
436   ierr = PetscFree(svalues);CHKERRQ(ierr);
437 
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatMult_MPIDense"
443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444 {
445   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
446   PetscErrorCode ierr;
447 
448   PetscFunctionBegin;
449   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
450   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
451   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatMultAdd_MPIDense"
457 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
458 {
459   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
460   PetscErrorCode ierr;
461 
462   PetscFunctionBegin;
463   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
464   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
465   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMultTranspose_MPIDense"
471 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
474   PetscErrorCode ierr;
475   PetscScalar    zero = 0.0;
476 
477   PetscFunctionBegin;
478   ierr = VecSet(yy,zero);CHKERRQ(ierr);
479   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
480   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
481   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
487 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
488 {
489   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
490   PetscErrorCode ierr;
491 
492   PetscFunctionBegin;
493   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
494   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
495   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
496   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatGetDiagonal_MPIDense"
502 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
503 {
504   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
505   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
506   PetscErrorCode ierr;
507   PetscInt       len,i,n,m = A->rmap.n,radd;
508   PetscScalar    *x,zero = 0.0;
509 
510   PetscFunctionBegin;
511   ierr = VecSet(v,zero);CHKERRQ(ierr);
512   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
513   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
514   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
515   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
516   radd = A->rmap.rstart*m;
517   for (i=0; i<len; i++) {
518     x[i] = aloc->v[radd + i*m + i];
519   }
520   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatDestroy_MPIDense"
526 PetscErrorCode MatDestroy_MPIDense(Mat mat)
527 {
528   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
529   PetscErrorCode ierr;
530 #if defined(PETSC_HAVE_PLAPACK)
531   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
532 #endif
533 
534   PetscFunctionBegin;
535 
536 #if defined(PETSC_USE_LOG)
537   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
538 #endif
539   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
540   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
541   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
542   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
543 #if defined(PETSC_HAVE_PLAPACK)
544   if (lu->CleanUpPlapack) {
545     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
546     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
547     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
548 
549 
550     ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
551     ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
552     ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
553   }
554 #endif
555 
556   ierr = PetscFree(mdn);CHKERRQ(ierr);
557   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
560   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_MPIDense_Binary"
568 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
569 {
570   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
571   PetscErrorCode    ierr;
572   PetscViewerFormat format;
573   int               fd;
574   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
575   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
576   PetscScalar       *work,*v,*vv;
577   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
578   MPI_Status        status;
579 
580   PetscFunctionBegin;
581   if (mdn->size == 1) {
582     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
583   } else {
584     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
585     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
586     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
587 
588     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
589     if (format == PETSC_VIEWER_BINARY_NATIVE) {
590 
591       if (!rank) {
592 	/* store the matrix as a dense matrix */
593 	header[0] = MAT_FILE_COOKIE;
594 	header[1] = mat->rmap.N;
595 	header[2] = N;
596 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
597 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
598 
599 	/* get largest work array needed for transposing array */
600         mmax = mat->rmap.n;
601         for (i=1; i<size; i++) {
602           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
603         }
604 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
605 
606 	/* write out local array, by rows */
607         m    = mat->rmap.n;
608 	v    = a->v;
609         for (j=0; j<N; j++) {
610           for (i=0; i<m; i++) {
611 	    work[j + i*N] = *v++;
612 	  }
613 	}
614 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
615         /* get largest work array to receive messages from other processes, excludes process zero */
616         mmax = 0;
617         for (i=1; i<size; i++) {
618           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
619         }
620 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
621         v = vv;
622         for (k=1; k<size; k++) {
623           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
624           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
625 
626           for (j=0; j<N; j++) {
627             for (i=0; i<m; i++) {
628               work[j + i*N] = *v++;
629 	    }
630 	  }
631 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632         }
633         ierr = PetscFree(work);CHKERRQ(ierr);
634         ierr = PetscFree(vv);CHKERRQ(ierr);
635       } else {
636         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
637       }
638     }
639   }
640   PetscFunctionReturn(0);
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
645 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
646 {
647   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
648   PetscErrorCode    ierr;
649   PetscMPIInt       size = mdn->size,rank = mdn->rank;
650   PetscViewerType   vtype;
651   PetscTruth        iascii,isdraw;
652   PetscViewer       sviewer;
653   PetscViewerFormat format;
654 #if defined(PETSC_HAVE_PLAPACK)
655   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
656 #endif
657 
658   PetscFunctionBegin;
659   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
660   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
661   if (iascii) {
662     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
663     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
664     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
665       MatInfo info;
666       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
667       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
668                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
669       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
670       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
671 #if defined(PETSC_HAVE_PLAPACK)
672       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
673       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
674       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
675       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->ierror);CHKERRQ(ierr);
676       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr);
677 #endif
678       PetscFunctionReturn(0);
679     } else if (format == PETSC_VIEWER_ASCII_INFO) {
680       PetscFunctionReturn(0);
681     }
682   } else if (isdraw) {
683     PetscDraw  draw;
684     PetscTruth isnull;
685 
686     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
687     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
688     if (isnull) PetscFunctionReturn(0);
689   }
690 
691   if (size == 1) {
692     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
693   } else {
694     /* assemble the entire matrix onto first processor. */
695     Mat         A;
696     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
697     PetscInt    *cols;
698     PetscScalar *vals;
699 
700     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
701     if (!rank) {
702       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
703     } else {
704       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
705     }
706     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
707     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
708     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
709     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
710 
711     /* Copy the matrix ... This isn't the most efficient means,
712        but it's quick for now */
713     A->insertmode = INSERT_VALUES;
714     row = mat->rmap.rstart; m = mdn->A->rmap.n;
715     for (i=0; i<m; i++) {
716       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
717       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
718       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       row++;
720     }
721 
722     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
723     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
724     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
727     }
728     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
729     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
730     ierr = MatDestroy(A);CHKERRQ(ierr);
731   }
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatView_MPIDense"
737 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
738 {
739   PetscErrorCode ierr;
740   PetscTruth     iascii,isbinary,isdraw,issocket;
741 
742   PetscFunctionBegin;
743 
744   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
745   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
746   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
747   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
748 
749   if (iascii || issocket || isdraw) {
750     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
751   } else if (isbinary) {
752     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
753   } else {
754     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatGetInfo_MPIDense"
761 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
762 {
763   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
764   Mat            mdn = mat->A;
765   PetscErrorCode ierr;
766   PetscReal      isend[5],irecv[5];
767 
768   PetscFunctionBegin;
769   info->rows_global    = (double)A->rmap.N;
770   info->columns_global = (double)A->cmap.N;
771   info->rows_local     = (double)A->rmap.n;
772   info->columns_local  = (double)A->cmap.N;
773   info->block_size     = 1.0;
774   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
775   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
776   isend[3] = info->memory;  isend[4] = info->mallocs;
777   if (flag == MAT_LOCAL) {
778     info->nz_used      = isend[0];
779     info->nz_allocated = isend[1];
780     info->nz_unneeded  = isend[2];
781     info->memory       = isend[3];
782     info->mallocs      = isend[4];
783   } else if (flag == MAT_GLOBAL_MAX) {
784     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
785     info->nz_used      = irecv[0];
786     info->nz_allocated = irecv[1];
787     info->nz_unneeded  = irecv[2];
788     info->memory       = irecv[3];
789     info->mallocs      = irecv[4];
790   } else if (flag == MAT_GLOBAL_SUM) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   }
798   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
799   info->fill_ratio_needed = 0;
800   info->factor_mallocs    = 0;
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSetOption_MPIDense"
806 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
807 {
808   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
809   PetscErrorCode ierr;
810 
811   PetscFunctionBegin;
812   switch (op) {
813   case MAT_NEW_NONZERO_LOCATIONS:
814   case MAT_NEW_NONZERO_LOCATION_ERR:
815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
817     break;
818   case MAT_ROW_ORIENTED:
819     a->roworiented = flg;
820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
821     break;
822   case MAT_NEW_DIAGONALS:
823   case MAT_USE_HASH_TABLE:
824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
825     break;
826   case MAT_IGNORE_OFF_PROC_ENTRIES:
827     a->donotstash = flg;
828     break;
829   case MAT_SYMMETRIC:
830   case MAT_STRUCTURALLY_SYMMETRIC:
831   case MAT_HERMITIAN:
832   case MAT_SYMMETRY_ETERNAL:
833   case MAT_IGNORE_LOWER_TRIANGULAR:
834     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
835     break;
836   default:
837     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatDiagonalScale_MPIDense"
845 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
846 {
847   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
848   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
849   PetscScalar    *l,*r,x,*v;
850   PetscErrorCode ierr;
851   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
852 
853   PetscFunctionBegin;
854   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
855   if (ll) {
856     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
857     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
858     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
859     for (i=0; i<m; i++) {
860       x = l[i];
861       v = mat->v + i;
862       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
863     }
864     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
865     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
866   }
867   if (rr) {
868     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
869     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
870     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
873     for (i=0; i<n; i++) {
874       x = r[i];
875       v = mat->v + i*m;
876       for (j=0; j<m; j++) { (*v++) *= x;}
877     }
878     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
879     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
880   }
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatNorm_MPIDense"
886 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
887 {
888   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
889   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
890   PetscErrorCode ierr;
891   PetscInt       i,j;
892   PetscReal      sum = 0.0;
893   PetscScalar    *v = mat->v;
894 
895   PetscFunctionBegin;
896   if (mdn->size == 1) {
897     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
898   } else {
899     if (type == NORM_FROBENIUS) {
900       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
901 #if defined(PETSC_USE_COMPLEX)
902         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
903 #else
904         sum += (*v)*(*v); v++;
905 #endif
906       }
907       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
908       *nrm = sqrt(*nrm);
909       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
910     } else if (type == NORM_1) {
911       PetscReal *tmp,*tmp2;
912       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
913       tmp2 = tmp + A->cmap.N;
914       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
915       *nrm = 0.0;
916       v = mat->v;
917       for (j=0; j<mdn->A->cmap.n; j++) {
918         for (i=0; i<mdn->A->rmap.n; i++) {
919           tmp[j] += PetscAbsScalar(*v);  v++;
920         }
921       }
922       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
923       for (j=0; j<A->cmap.N; j++) {
924         if (tmp2[j] > *nrm) *nrm = tmp2[j];
925       }
926       ierr = PetscFree(tmp);CHKERRQ(ierr);
927       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
928     } else if (type == NORM_INFINITY) { /* max row norm */
929       PetscReal ntemp;
930       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
931       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
932     } else {
933       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
934     }
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatTranspose_MPIDense"
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (A == *matout && M != N) {
953     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
954   }
955   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
956     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
957     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
959     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
960   } else {
961     B = *matout;
962   }
963 
964   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
965   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
966   for (i=0; i<m; i++) rwork[i] = rstart + i;
967   for (j=0; j<n; j++) {
968     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
969     v   += m;
970   }
971   ierr = PetscFree(rwork);CHKERRQ(ierr);
972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   if (*matout != A) {
975     *matout = B;
976   } else {
977     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #include "petscblaslapack.h"
983 #undef __FUNCT__
984 #define __FUNCT__ "MatScale_MPIDense"
985 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
986 {
987   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
988   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
989   PetscScalar    oalpha = alpha;
990   PetscErrorCode ierr;
991   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
992 
993   PetscFunctionBegin;
994   BLASscal_(&nz,&oalpha,a->v,&one);
995   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1003 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1014 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1015 {
1016   PetscErrorCode ierr;
1017   PetscInt       m=A->rmap.n,n=B->cmap.n;
1018   Mat            Cmat;
1019 
1020   PetscFunctionBegin;
1021   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1022   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1023   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1024   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1025   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1026   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1027   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1028   *C = Cmat;
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #if defined(PETSC_HAVE_PLAPACK)
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatSolve_MPIDense"
1035 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1036 {
1037   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1038   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1039   PetscErrorCode ierr;
1040   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1041   PetscScalar    *array;
1042   PetscReal      one = 1.0;
1043   PetscMPIInt    size,r_rank,r_nproc,c_rank,c_nproc;;
1044   PLA_Obj        v_pla = NULL;
1045   PetscScalar    *loc_buf;
1046   Vec            loc_x;
1047 
1048   PetscFunctionBegin;
1049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1050 
1051   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1052   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1053   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1054 
1055   /* Copy b into rhs_pla */
1056   ierr = PLA_API_begin();CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1058   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1059   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1060   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1061   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1062   ierr = PLA_API_end();CHKERRQ(ierr);
1063 
1064   if (A->factor == FACTOR_LU){
1065     /* Apply the permutations to the right hand sides */
1066     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1067 
1068     /* Solve L y = b, overwriting b with y */
1069     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1070 
1071     /* Solve U x = y (=b), overwriting b with x */
1072     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1073   } else { /* FACTOR_CHOLESKY */
1074     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1075     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1076   }
1077 
1078   /* Copy PLAPACK x into Petsc vector x  */
1079   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1080   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1081   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1082   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1083   if (!lu->pla_solved){
1084 
1085     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1086     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1087     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1088 
1089     /* Create IS and cts for VecScatterring */
1090     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1091     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1092     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1093     idx_petsc = idx_pla + loc_m;
1094 
1095     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1096     for (i=0; i<loc_m; i+=lu->nb){
1097       j = 0;
1098       while (j < lu->nb && i+j < loc_m){
1099         idx_petsc[i+j] = rstart + j; j++;
1100       }
1101       rstart += size*lu->nb;
1102     }
1103 
1104     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1105 
1106     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1107     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1108     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1109     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1110   }
1111   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113 
1114   /* Free data */
1115   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1116   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1117 
1118   lu->pla_solved = PETSC_TRUE;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1124 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1125 {
1126   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1127   PetscErrorCode ierr;
1128   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1129   PetscInt       info_pla=0;
1130   PetscScalar    *array,one = 1.0;
1131 
1132   PetscFunctionBegin;
1133   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1134 
1135   /* Copy A into lu->A */
1136   ierr = PLA_API_begin();CHKERRQ(ierr);
1137   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1139   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1140   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1141   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1142   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1143   ierr = PLA_API_end();CHKERRQ(ierr);
1144 
1145   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1146   info_pla = PLA_LU(lu->A,lu->pivots);
1147   if (info_pla != 0)
1148     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1149 
1150   lu->CleanUpPlapack = PETSC_TRUE;
1151   lu->rstart         = rstart;
1152 
1153   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1159 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1160 {
1161   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1162   PetscErrorCode ierr;
1163   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1164   PetscInt       info_pla=0;
1165   PetscScalar    *array,one = 1.0;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Copy A into lu->A */
1170   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1171   ierr = PLA_API_begin();CHKERRQ(ierr);
1172   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1173   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1174   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1175   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1176   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1177   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1178   ierr = PLA_API_end();CHKERRQ(ierr);
1179 
1180   /* Factor P A -> Chol */
1181   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1182   if (info_pla != 0)
1183     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1184 
1185   lu->CleanUpPlapack = PETSC_TRUE;
1186   lu->rstart         = rstart;
1187 
1188   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1194 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1195 {
1196   Mat            B;
1197   Mat_Plapack    *lu;
1198   PetscErrorCode ierr;
1199   PetscInt       M=A->rmap.N,N=A->cmap.N;
1200   MPI_Comm       comm=((PetscObject)A)->comm;
1201   PetscMPIInt    size;
1202   PetscInt       ierror;
1203 
1204   PetscFunctionBegin;
1205   /* Create the factorization matrix */
1206   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1207   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
1208   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1209 
1210   lu = (Mat_Plapack*)(B->spptr);
1211 
1212   /* Set default Plapack parameters */
1213   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1214   ierror = 0;
1215   lu->nb     = M/size;
1216   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1217 
1218   /* Set runtime options */
1219   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1220   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
1221   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
1222 
1223   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1224   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
1225   if (ierror){
1226     ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
1227   } else {
1228     ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
1229   }
1230   lu->ierror = ierror;
1231 
1232   lu->nb_alg = 0;
1233   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
1234   if (lu->nb_alg){
1235     ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
1236   }
1237   PetscOptionsEnd();
1238 
1239 
1240   /* Create object distribution template */
1241   lu->templ = NULL;
1242   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1243 
1244   /* Use suggested nb_alg if it is not provided by user */
1245   if (lu->nb_alg == 0){
1246     ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr);
1247     ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
1248   }
1249 
1250   /* Set the datatype */
1251 #if defined(PETSC_USE_COMPLEX)
1252   lu->datatype = MPI_DOUBLE_COMPLEX;
1253 #else
1254   lu->datatype = MPI_DOUBLE;
1255 #endif
1256 
1257   ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1258 
1259   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1260   lu->CleanUpPlapack = PETSC_TRUE;
1261   *F                 = B;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 /* Note the Petsc r and c permutations are ignored */
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1268 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1269 {
1270   PetscErrorCode ierr;
1271   PetscInt       M = A->rmap.N;
1272   Mat_Plapack    *lu;
1273 
1274   PetscFunctionBegin;
1275   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1276   lu = (Mat_Plapack*)(*F)->spptr;
1277   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1278   (*F)->factor = FACTOR_LU;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 /* Note the Petsc perm permutation is ignored */
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1285 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1286 {
1287   PetscErrorCode ierr;
1288   PetscTruth     issymmetric,set;
1289 
1290   PetscFunctionBegin;
1291   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1292   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1293   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1294   (*F)->factor = FACTOR_CHOLESKY;
1295   PetscFunctionReturn(0);
1296 }
1297 #endif
1298 
1299 /* -------------------------------------------------------------------*/
1300 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1301        MatGetRow_MPIDense,
1302        MatRestoreRow_MPIDense,
1303        MatMult_MPIDense,
1304 /* 4*/ MatMultAdd_MPIDense,
1305        MatMultTranspose_MPIDense,
1306        MatMultTransposeAdd_MPIDense,
1307 #if defined(PETSC_HAVE_PLAPACK)
1308        MatSolve_MPIDense,
1309 #else
1310        0,
1311 #endif
1312        0,
1313        0,
1314 /*10*/ 0,
1315        0,
1316        0,
1317        0,
1318        MatTranspose_MPIDense,
1319 /*15*/ MatGetInfo_MPIDense,
1320        MatEqual_MPIDense,
1321        MatGetDiagonal_MPIDense,
1322        MatDiagonalScale_MPIDense,
1323        MatNorm_MPIDense,
1324 /*20*/ MatAssemblyBegin_MPIDense,
1325        MatAssemblyEnd_MPIDense,
1326        0,
1327        MatSetOption_MPIDense,
1328        MatZeroEntries_MPIDense,
1329 /*25*/ MatZeroRows_MPIDense,
1330 #if defined(PETSC_HAVE_PLAPACK)
1331        MatLUFactorSymbolic_MPIDense,
1332        MatLUFactorNumeric_MPIDense,
1333        MatCholeskyFactorSymbolic_MPIDense,
1334        MatCholeskyFactorNumeric_MPIDense,
1335 #else
1336        0,
1337        0,
1338        0,
1339        0,
1340 #endif
1341 /*30*/ MatSetUpPreallocation_MPIDense,
1342        0,
1343        0,
1344        MatGetArray_MPIDense,
1345        MatRestoreArray_MPIDense,
1346 /*35*/ MatDuplicate_MPIDense,
1347        0,
1348        0,
1349        0,
1350        0,
1351 /*40*/ 0,
1352        MatGetSubMatrices_MPIDense,
1353        0,
1354        MatGetValues_MPIDense,
1355        0,
1356 /*45*/ 0,
1357        MatScale_MPIDense,
1358        0,
1359        0,
1360        0,
1361 /*50*/ 0,
1362        0,
1363        0,
1364        0,
1365        0,
1366 /*55*/ 0,
1367        0,
1368        0,
1369        0,
1370        0,
1371 /*60*/ MatGetSubMatrix_MPIDense,
1372        MatDestroy_MPIDense,
1373        MatView_MPIDense,
1374        0,
1375        0,
1376 /*65*/ 0,
1377        0,
1378        0,
1379        0,
1380        0,
1381 /*70*/ 0,
1382        0,
1383        0,
1384        0,
1385        0,
1386 /*75*/ 0,
1387        0,
1388        0,
1389        0,
1390        0,
1391 /*80*/ 0,
1392        0,
1393        0,
1394        0,
1395 /*84*/ MatLoad_MPIDense,
1396        0,
1397        0,
1398        0,
1399        0,
1400        0,
1401 /*90*/ 0,
1402        MatMatMultSymbolic_MPIDense_MPIDense,
1403        0,
1404        0,
1405        0,
1406 /*95*/ 0,
1407        0,
1408        0,
1409        0};
1410 
1411 EXTERN_C_BEGIN
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1414 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1415 {
1416   Mat_MPIDense   *a;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   mat->preallocated = PETSC_TRUE;
1421   /* Note:  For now, when data is specified above, this assumes the user correctly
1422    allocates the local dense storage space.  We should add error checking. */
1423 
1424   a    = (Mat_MPIDense*)mat->data;
1425   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1426   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1427   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1428   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1429   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 EXTERN_C_END
1433 
1434 EXTERN_C_BEGIN
1435 #if defined(PETSC_HAVE_PLAPACK)
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1438 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1439 {
1440   PetscFunctionBegin;
1441   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1442   PetscFunctionReturn(0);
1443 }
1444 #endif
1445 EXTERN_C_END
1446 
1447 /*MC
1448    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1449 
1450    Options Database Keys:
1451 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1452 
1453   Level: beginner
1454 
1455   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1456   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1457   (run config/configure.py with the option --download-plapack)
1458 
1459 
1460   Options Database Keys:
1461 . -mat_plapack_nprows <n> - number of rows in processor partition
1462 . -mat_plapack_npcols <n> - number of columns in processor partition
1463 . -mat_plapack_nb <n> - block size of template vector
1464 . -mat_plapack_nb_alg <n> - algorithmic block size
1465 - -mat_plapack_ckerror <n> - error checking flag
1466 
1467 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1468 M*/
1469 
1470 EXTERN_C_BEGIN
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatCreate_MPIDense"
1473 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1474 {
1475   Mat_MPIDense   *a;
1476   PetscErrorCode ierr;
1477 #if defined(PETSC_HAVE_PLAPACK)
1478   Mat_Plapack    *lu;
1479 #endif
1480 
1481   PetscFunctionBegin;
1482   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1483   mat->data         = (void*)a;
1484   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1485   mat->factor       = 0;
1486   mat->mapping      = 0;
1487 
1488   mat->insertmode = NOT_SET_VALUES;
1489   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1490   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1491 
1492   mat->rmap.bs = mat->cmap.bs = 1;
1493   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1494   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1495   a->nvec = mat->cmap.n;
1496 
1497   /* build cache for off array entries formed */
1498   a->donotstash = PETSC_FALSE;
1499   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1500 
1501   /* stuff used for matrix vector multiply */
1502   a->lvec        = 0;
1503   a->Mvctx       = 0;
1504   a->roworiented = PETSC_TRUE;
1505 
1506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1507                                      "MatGetDiagonalBlock_MPIDense",
1508                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1510                                      "MatMPIDenseSetPreallocation_MPIDense",
1511                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1513                                      "MatMatMult_MPIAIJ_MPIDense",
1514                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1516                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1517                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1519                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1520                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1521 #if defined(PETSC_HAVE_PLAPACK)
1522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1523                                      "MatSetSolverType_MPIDense_PLAPACK",
1524                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1525 #endif
1526   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1527 
1528 #if defined(PETSC_HAVE_PLAPACK)
1529   ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr);
1530   lu->CleanUpPlapack       = PETSC_FALSE;
1531   mat->spptr                 = (void*)lu;
1532 #endif
1533   PetscFunctionReturn(0);
1534 }
1535 EXTERN_C_END
1536 
1537 /*MC
1538    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1539 
1540    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1541    and MATMPIDENSE otherwise.
1542 
1543    Options Database Keys:
1544 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1545 
1546   Level: beginner
1547 
1548 
1549 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1550 M*/
1551 
1552 EXTERN_C_BEGIN
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatCreate_Dense"
1555 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1556 {
1557   PetscErrorCode ierr;
1558   PetscMPIInt    size;
1559 
1560   PetscFunctionBegin;
1561   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1562   if (size == 1) {
1563     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1564   } else {
1565     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 EXTERN_C_END
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1573 /*@C
1574    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1575 
1576    Not collective
1577 
1578    Input Parameters:
1579 .  A - the matrix
1580 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1581    to control all matrix memory allocation.
1582 
1583    Notes:
1584    The dense format is fully compatible with standard Fortran 77
1585    storage by columns.
1586 
1587    The data input variable is intended primarily for Fortran programmers
1588    who wish to allocate their own matrix memory space.  Most users should
1589    set data=PETSC_NULL.
1590 
1591    Level: intermediate
1592 
1593 .keywords: matrix,dense, parallel
1594 
1595 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1596 @*/
1597 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1598 {
1599   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1600 
1601   PetscFunctionBegin;
1602   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1603   if (f) {
1604     ierr = (*f)(mat,data);CHKERRQ(ierr);
1605   }
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatCreateMPIDense"
1611 /*@C
1612    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1613 
1614    Collective on MPI_Comm
1615 
1616    Input Parameters:
1617 +  comm - MPI communicator
1618 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1619 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1620 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1621 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1622 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1623    to control all matrix memory allocation.
1624 
1625    Output Parameter:
1626 .  A - the matrix
1627 
1628    Notes:
1629    The dense format is fully compatible with standard Fortran 77
1630    storage by columns.
1631 
1632    The data input variable is intended primarily for Fortran programmers
1633    who wish to allocate their own matrix memory space.  Most users should
1634    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1635 
1636    The user MUST specify either the local or global matrix dimensions
1637    (possibly both).
1638 
1639    Level: intermediate
1640 
1641 .keywords: matrix,dense, parallel
1642 
1643 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1644 @*/
1645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1646 {
1647   PetscErrorCode ierr;
1648   PetscMPIInt    size;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1652   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1654   if (size > 1) {
1655     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1656     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1657   } else {
1658     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1659     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatDuplicate_MPIDense"
1666 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1667 {
1668   Mat            mat;
1669   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1670   PetscErrorCode ierr;
1671 
1672   PetscFunctionBegin;
1673   *newmat       = 0;
1674   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1675   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1676   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1677   a                 = (Mat_MPIDense*)mat->data;
1678   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1679   mat->factor       = A->factor;
1680   mat->assembled    = PETSC_TRUE;
1681   mat->preallocated = PETSC_TRUE;
1682 
1683   mat->rmap.rstart     = A->rmap.rstart;
1684   mat->rmap.rend       = A->rmap.rend;
1685   a->size              = oldmat->size;
1686   a->rank              = oldmat->rank;
1687   mat->insertmode      = NOT_SET_VALUES;
1688   a->nvec              = oldmat->nvec;
1689   a->donotstash        = oldmat->donotstash;
1690 
1691   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1692   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1693   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1694 
1695   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1696   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1697   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1698 
1699 #if defined(PETSC_HAVE_PLAPACK)
1700   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1701 #endif
1702   *newmat = mat;
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #include "petscsys.h"
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1710 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1711 {
1712   PetscErrorCode ierr;
1713   PetscMPIInt    rank,size;
1714   PetscInt       *rowners,i,m,nz,j;
1715   PetscScalar    *array,*vals,*vals_ptr;
1716   MPI_Status     status;
1717 
1718   PetscFunctionBegin;
1719   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1720   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1721 
1722   /* determine ownership of all rows */
1723   m          = M/size + ((M % size) > rank);
1724   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1725   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1726   rowners[0] = 0;
1727   for (i=2; i<=size; i++) {
1728     rowners[i] += rowners[i-1];
1729   }
1730 
1731   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1732   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1733   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1734   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1735   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1736 
1737   if (!rank) {
1738     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1739 
1740     /* read in my part of the matrix numerical values  */
1741     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1742 
1743     /* insert into matrix-by row (this is why cannot directly read into array */
1744     vals_ptr = vals;
1745     for (i=0; i<m; i++) {
1746       for (j=0; j<N; j++) {
1747         array[i + j*m] = *vals_ptr++;
1748       }
1749     }
1750 
1751     /* read in other processors and ship out */
1752     for (i=1; i<size; i++) {
1753       nz   = (rowners[i+1] - rowners[i])*N;
1754       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1755       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1756     }
1757   } else {
1758     /* receive numeric values */
1759     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1760 
1761     /* receive message of values*/
1762     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1763 
1764     /* insert into matrix-by row (this is why cannot directly read into array */
1765     vals_ptr = vals;
1766     for (i=0; i<m; i++) {
1767       for (j=0; j<N; j++) {
1768         array[i + j*m] = *vals_ptr++;
1769       }
1770     }
1771   }
1772   ierr = PetscFree(rowners);CHKERRQ(ierr);
1773   ierr = PetscFree(vals);CHKERRQ(ierr);
1774   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1775   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatLoad_MPIDense"
1781 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1782 {
1783   Mat            A;
1784   PetscScalar    *vals,*svals;
1785   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1786   MPI_Status     status;
1787   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1788   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1789   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1790   PetscInt       i,nz,j,rstart,rend;
1791   int            fd;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1796   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1797   if (!rank) {
1798     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1799     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1800     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1801   }
1802 
1803   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1804   M = header[1]; N = header[2]; nz = header[3];
1805 
1806   /*
1807        Handle case where matrix is stored on disk as a dense matrix
1808   */
1809   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1810     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1811     PetscFunctionReturn(0);
1812   }
1813 
1814   /* determine ownership of all rows */
1815   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1816   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1817   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1818   rowners[0] = 0;
1819   for (i=2; i<=size; i++) {
1820     rowners[i] += rowners[i-1];
1821   }
1822   rstart = rowners[rank];
1823   rend   = rowners[rank+1];
1824 
1825   /* distribute row lengths to all processors */
1826   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1827   offlens = ourlens + (rend-rstart);
1828   if (!rank) {
1829     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1830     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1831     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1832     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1833     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1834     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1835   } else {
1836     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1837   }
1838 
1839   if (!rank) {
1840     /* calculate the number of nonzeros on each processor */
1841     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1842     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1843     for (i=0; i<size; i++) {
1844       for (j=rowners[i]; j< rowners[i+1]; j++) {
1845         procsnz[i] += rowlengths[j];
1846       }
1847     }
1848     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1849 
1850     /* determine max buffer needed and allocate it */
1851     maxnz = 0;
1852     for (i=0; i<size; i++) {
1853       maxnz = PetscMax(maxnz,procsnz[i]);
1854     }
1855     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1856 
1857     /* read in my part of the matrix column indices  */
1858     nz = procsnz[0];
1859     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1860     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1861 
1862     /* read in every one elses and ship off */
1863     for (i=1; i<size; i++) {
1864       nz   = procsnz[i];
1865       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1866       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1867     }
1868     ierr = PetscFree(cols);CHKERRQ(ierr);
1869   } else {
1870     /* determine buffer space needed for message */
1871     nz = 0;
1872     for (i=0; i<m; i++) {
1873       nz += ourlens[i];
1874     }
1875     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1876 
1877     /* receive message of column indices*/
1878     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1879     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1880     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1881   }
1882 
1883   /* loop over local rows, determining number of off diagonal entries */
1884   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1885   jj = 0;
1886   for (i=0; i<m; i++) {
1887     for (j=0; j<ourlens[i]; j++) {
1888       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1889       jj++;
1890     }
1891   }
1892 
1893   /* create our matrix */
1894   for (i=0; i<m; i++) {
1895     ourlens[i] -= offlens[i];
1896   }
1897   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1898   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1899   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1900   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1901   A = *newmat;
1902   for (i=0; i<m; i++) {
1903     ourlens[i] += offlens[i];
1904   }
1905 
1906   if (!rank) {
1907     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1908 
1909     /* read in my part of the matrix numerical values  */
1910     nz = procsnz[0];
1911     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1912 
1913     /* insert into matrix */
1914     jj      = rstart;
1915     smycols = mycols;
1916     svals   = vals;
1917     for (i=0; i<m; i++) {
1918       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1919       smycols += ourlens[i];
1920       svals   += ourlens[i];
1921       jj++;
1922     }
1923 
1924     /* read in other processors and ship out */
1925     for (i=1; i<size; i++) {
1926       nz   = procsnz[i];
1927       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1928       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1929     }
1930     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1931   } else {
1932     /* receive numeric values */
1933     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1934 
1935     /* receive message of values*/
1936     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1937     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1938     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1939 
1940     /* insert into matrix */
1941     jj      = rstart;
1942     smycols = mycols;
1943     svals   = vals;
1944     for (i=0; i<m; i++) {
1945       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1946       smycols += ourlens[i];
1947       svals   += ourlens[i];
1948       jj++;
1949     }
1950   }
1951   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1952   ierr = PetscFree(vals);CHKERRQ(ierr);
1953   ierr = PetscFree(mycols);CHKERRQ(ierr);
1954   ierr = PetscFree(rowners);CHKERRQ(ierr);
1955 
1956   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatEqual_MPIDense"
1963 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1964 {
1965   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1966   Mat            a,b;
1967   PetscTruth     flg;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   a = matA->A;
1972   b = matB->A;
1973   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1974   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #if defined(PETSC_HAVE_PLAPACK)
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
1982 /*@C
1983   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
1984   Level: developer
1985 
1986 .keywords: Petsc, destroy, package, PLAPACK
1987 .seealso: PetscFinalize()
1988 @*/
1989 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
1990 {
1991   PetscErrorCode ierr;
1992 
1993   PetscFunctionBegin;
1994   ierr = PLA_Finalize();CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2000 /*@C
2001   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2002   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2003   when using static libraries.
2004 
2005   Input Parameter:
2006   path - The dynamic library path, or PETSC_NULL
2007 
2008   Level: developer
2009 
2010 .keywords: Petsc, initialize, package, PLAPACK
2011 .seealso: PetscInitializePackage(), PetscInitialize()
2012 @*/
2013 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2014 {
2015   MPI_Comm       comm = PETSC_COMM_WORLD;
2016   PetscMPIInt    size,nb_alg;
2017   PetscErrorCode ierr;
2018   PetscTruth     ierror;
2019 
2020   PetscFunctionBegin;
2021   if (!PLA_Initialized(PETSC_NULL)) {
2022 
2023     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2024     Plapack_nprows = 1;
2025     Plapack_npcols = size;
2026 
2027     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2028       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2029       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2030       ierr = PetscOptionsTruth("-plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
2031       if (ierror){
2032 	ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2033       } else {
2034 	ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2035       }
2036 
2037       nb_alg = 0;
2038       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",nb_alg,&nb_alg,PETSC_NULL);CHKERRQ(ierr);
2039       if (nb_alg) {
2040         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,nb_alg);CHKERRQ(ierr);
2041       }
2042     PetscOptionsEnd();
2043 
2044     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2045     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2046     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 
2052 #endif
2053