xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 09d27a7e5062e5e2663cc42c11aea4a9a83cf7fa)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscTruth     flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) {
34     *B = mat->A;
35   } else {
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 #undef __FUNCT__
42 #define __FUNCT__ "MatGetRow_MPIDense"
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "MatRestoreRow_MPIDense"
58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
59 {
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
64   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
72 {
73   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
74   PetscErrorCode ierr;
75   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
76   PetscScalar    *array;
77   MPI_Comm       comm;
78 
79   PetscFunctionBegin;
80   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
81 
82   /* The reuse aspect is not implemented efficiently */
83   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
84 
85   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
86   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
87   ierr = MatCreate(comm,B);CHKERRQ(ierr);
88   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
89   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
90   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
91   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
93   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
94 
95   *iscopy = PETSC_TRUE;
96   PetscFunctionReturn(0);
97 }
98 EXTERN_C_END
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "MatSetValues_MPIDense"
102 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
103 {
104   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
105   PetscErrorCode ierr;
106   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
107   PetscTruth     roworiented = A->roworiented;
108 
109   PetscFunctionBegin;
110   for (i=0; i<m; i++) {
111     if (idxm[i] < 0) continue;
112     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
113     if (idxm[i] >= rstart && idxm[i] < rend) {
114       row = idxm[i] - rstart;
115       if (roworiented) {
116         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
117       } else {
118         for (j=0; j<n; j++) {
119           if (idxn[j] < 0) continue;
120           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
121           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
122         }
123       }
124     } else {
125       if (!A->donotstash) {
126         if (roworiented) {
127           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
128         } else {
129           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
130         }
131       }
132     }
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatGetValues_MPIDense"
139 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
140 {
141   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
142   PetscErrorCode ierr;
143   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
144 
145   PetscFunctionBegin;
146   for (i=0; i<m; i++) {
147     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
148     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
149     if (idxm[i] >= rstart && idxm[i] < rend) {
150       row = idxm[i] - rstart;
151       for (j=0; j<n; j++) {
152         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
153         if (idxn[j] >= mat->cmap.N) {
154           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
155         }
156         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
157       }
158     } else {
159       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
160     }
161   }
162   PetscFunctionReturn(0);
163 }
164 
165 #undef __FUNCT__
166 #define __FUNCT__ "MatGetArray_MPIDense"
167 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 #undef __FUNCT__
178 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
179 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
180 {
181   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
182   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
183   PetscErrorCode ierr;
184   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
185   PetscScalar    *av,*bv,*v = lmat->v;
186   Mat            newmat;
187 
188   PetscFunctionBegin;
189   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
190   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
191   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
192   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
193 
194   /* No parallel redistribution currently supported! Should really check each index set
195      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
196      original matrix! */
197 
198   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
199   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
200 
201   /* Check submatrix call */
202   if (scall == MAT_REUSE_MATRIX) {
203     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
204     /* Really need to test rows and column sizes! */
205     newmat = *B;
206   } else {
207     /* Create and fill new matrix */
208     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
209     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
210     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
211     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
212   }
213 
214   /* Now extract the data pointers and do the copy, column at a time */
215   newmatd = (Mat_MPIDense*)newmat->data;
216   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
217 
218   for (i=0; i<ncols; i++) {
219     av = v + nlrows*icol[i];
220     for (j=0; j<nrows; j++) {
221       *bv++ = av[irow[j] - rstart];
222     }
223   }
224 
225   /* Assemble the matrices so that the correct flags are set */
226   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
228 
229   /* Free work space */
230   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
231   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
232   *B = newmat;
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatRestoreArray_MPIDense"
238 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
239 {
240   PetscFunctionBegin;
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
246 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
247 {
248   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
249   MPI_Comm       comm = ((PetscObject)mat)->comm;
250   PetscErrorCode ierr;
251   PetscInt       nstash,reallocs;
252   InsertMode     addv;
253 
254   PetscFunctionBegin;
255   /* make sure all processors are either in INSERTMODE or ADDMODE */
256   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
257   if (addv == (ADD_VALUES|INSERT_VALUES)) {
258     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
259   }
260   mat->insertmode = addv; /* in case this processor had no cache */
261 
262   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
263   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
264   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
271 {
272   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
273   PetscErrorCode  ierr;
274   PetscInt        i,*row,*col,flg,j,rstart,ncols;
275   PetscMPIInt     n;
276   PetscScalar     *val;
277   InsertMode      addv=mat->insertmode;
278 
279   PetscFunctionBegin;
280   /*  wait on receives */
281   while (1) {
282     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
283     if (!flg) break;
284 
285     for (i=0; i<n;) {
286       /* Now identify the consecutive vals belonging to the same row */
287       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
288       if (j < n) ncols = j-i;
289       else       ncols = n-i;
290       /* Now assemble all these values with a single function call */
291       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
292       i = j;
293     }
294   }
295   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
296 
297   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
298   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
299 
300   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
301     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
302   }
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatZeroEntries_MPIDense"
308 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
309 {
310   PetscErrorCode ierr;
311   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
312 
313   PetscFunctionBegin;
314   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 /* the code does not do the diagonal entries correctly unless the
319    matrix is square and the column and row owerships are identical.
320    This is a BUG. The only way to fix it seems to be to access
321    mdn->A and mdn->B directly and not through the MatZeroRows()
322    routine.
323 */
324 #undef __FUNCT__
325 #define __FUNCT__ "MatZeroRows_MPIDense"
326 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
327 {
328   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
329   PetscErrorCode ierr;
330   PetscInt       i,*owners = A->rmap.range;
331   PetscInt       *nprocs,j,idx,nsends;
332   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
333   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
334   PetscInt       *lens,*lrows,*values;
335   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
336   MPI_Comm       comm = ((PetscObject)A)->comm;
337   MPI_Request    *send_waits,*recv_waits;
338   MPI_Status     recv_status,*send_status;
339   PetscTruth     found;
340 
341   PetscFunctionBegin;
342   /*  first count number of contributors to each processor */
343   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
344   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
345   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
346   for (i=0; i<N; i++) {
347     idx = rows[i];
348     found = PETSC_FALSE;
349     for (j=0; j<size; j++) {
350       if (idx >= owners[j] && idx < owners[j+1]) {
351         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
352       }
353     }
354     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
355   }
356   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
357 
358   /* inform other processors of number of messages and max length*/
359   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
360 
361   /* post receives:   */
362   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
363   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
364   for (i=0; i<nrecvs; i++) {
365     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
366   }
367 
368   /* do sends:
369       1) starts[i] gives the starting index in svalues for stuff going to
370          the ith processor
371   */
372   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
373   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
374   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
375   starts[0]  = 0;
376   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
377   for (i=0; i<N; i++) {
378     svalues[starts[owner[i]]++] = rows[i];
379   }
380 
381   starts[0] = 0;
382   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   count = 0;
384   for (i=0; i<size; i++) {
385     if (nprocs[2*i+1]) {
386       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
387     }
388   }
389   ierr = PetscFree(starts);CHKERRQ(ierr);
390 
391   base = owners[rank];
392 
393   /*  wait on receives */
394   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
395   source = lens + nrecvs;
396   count  = nrecvs; slen = 0;
397   while (count) {
398     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
399     /* unpack receives into our local space */
400     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
401     source[imdex]  = recv_status.MPI_SOURCE;
402     lens[imdex]    = n;
403     slen += n;
404     count--;
405   }
406   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
407 
408   /* move the data into the send scatter */
409   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
410   count = 0;
411   for (i=0; i<nrecvs; i++) {
412     values = rvalues + i*nmax;
413     for (j=0; j<lens[i]; j++) {
414       lrows[count++] = values[j] - base;
415     }
416   }
417   ierr = PetscFree(rvalues);CHKERRQ(ierr);
418   ierr = PetscFree(lens);CHKERRQ(ierr);
419   ierr = PetscFree(owner);CHKERRQ(ierr);
420   ierr = PetscFree(nprocs);CHKERRQ(ierr);
421 
422   /* actually zap the local rows */
423   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
424   ierr = PetscFree(lrows);CHKERRQ(ierr);
425 
426   /* wait on sends */
427   if (nsends) {
428     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
429     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
430     ierr = PetscFree(send_status);CHKERRQ(ierr);
431   }
432   ierr = PetscFree(send_waits);CHKERRQ(ierr);
433   ierr = PetscFree(svalues);CHKERRQ(ierr);
434 
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatMult_MPIDense"
440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
441 {
442   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
443   PetscErrorCode ierr;
444 
445   PetscFunctionBegin;
446   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
447   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
448   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatMultAdd_MPIDense"
454 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
455 {
456   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
457   PetscErrorCode ierr;
458 
459   PetscFunctionBegin;
460   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
461   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
462   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatMultTranspose_MPIDense"
468 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
469 {
470   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
471   PetscErrorCode ierr;
472   PetscScalar    zero = 0.0;
473 
474   PetscFunctionBegin;
475   ierr = VecSet(yy,zero);CHKERRQ(ierr);
476   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
477   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
478   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
484 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
491   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
492   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
493   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatGetDiagonal_MPIDense"
499 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
503   PetscErrorCode ierr;
504   PetscInt       len,i,n,m = A->rmap.n,radd;
505   PetscScalar    *x,zero = 0.0;
506 
507   PetscFunctionBegin;
508   ierr = VecSet(v,zero);CHKERRQ(ierr);
509   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
510   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
511   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
512   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
513   radd = A->rmap.rstart*m;
514   for (i=0; i<len; i++) {
515     x[i] = aloc->v[radd + i*m + i];
516   }
517   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 #undef __FUNCT__
522 #define __FUNCT__ "MatDestroy_MPIDense"
523 PetscErrorCode MatDestroy_MPIDense(Mat mat)
524 {
525   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
526   PetscErrorCode ierr;
527 #if defined(PETSC_HAVE_PLAPACK)
528   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
529 #endif
530 
531   PetscFunctionBegin;
532 
533 #if defined(PETSC_USE_LOG)
534   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
535 #endif
536   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
537   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
538   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
539   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
540 #if defined(PETSC_HAVE_PLAPACK)
541   if (lu->CleanUpPlapack) {
542     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
543     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
544     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
545 
546 
547     ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
548     ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
549     ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
550   }
551 #endif
552 
553   ierr = PetscFree(mdn);CHKERRQ(ierr);
554   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
555   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
556   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatView_MPIDense_Binary"
565 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
566 {
567   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
568   PetscErrorCode    ierr;
569   PetscViewerFormat format;
570   int               fd;
571   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
572   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
573   PetscScalar       *work,*v,*vv;
574   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
575   MPI_Status        status;
576 
577   PetscFunctionBegin;
578   if (mdn->size == 1) {
579     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
580   } else {
581     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
582     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
583     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
584 
585     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
586     if (format == PETSC_VIEWER_BINARY_NATIVE) {
587 
588       if (!rank) {
589 	/* store the matrix as a dense matrix */
590 	header[0] = MAT_FILE_COOKIE;
591 	header[1] = mat->rmap.N;
592 	header[2] = N;
593 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
594 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
595 
596 	/* get largest work array needed for transposing array */
597         mmax = mat->rmap.n;
598         for (i=1; i<size; i++) {
599           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
600         }
601 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
602 
603 	/* write out local array, by rows */
604         m    = mat->rmap.n;
605 	v    = a->v;
606         for (j=0; j<N; j++) {
607           for (i=0; i<m; i++) {
608 	    work[j + i*N] = *v++;
609 	  }
610 	}
611 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
612         /* get largest work array to receive messages from other processes, excludes process zero */
613         mmax = 0;
614         for (i=1; i<size; i++) {
615           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
616         }
617 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
618         v = vv;
619         for (k=1; k<size; k++) {
620           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
621           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
622 
623           for (j=0; j<N; j++) {
624             for (i=0; i<m; i++) {
625               work[j + i*N] = *v++;
626 	    }
627 	  }
628 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
629         }
630         ierr = PetscFree(work);CHKERRQ(ierr);
631         ierr = PetscFree(vv);CHKERRQ(ierr);
632       } else {
633         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
634       }
635     }
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
643 {
644   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645   PetscErrorCode    ierr;
646   PetscMPIInt       size = mdn->size,rank = mdn->rank;
647   PetscViewerType   vtype;
648   PetscTruth        iascii,isdraw;
649   PetscViewer       sviewer;
650   PetscViewerFormat format;
651 #if defined(PETSC_HAVE_PLAPACK)
652   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
653 #endif
654 
655   PetscFunctionBegin;
656   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
657   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
658   if (iascii) {
659     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
660     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
661     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
662       MatInfo info;
663       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
664       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
665                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
666       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
667       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
668 #if defined(PETSC_HAVE_PLAPACK)
669       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
670       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr);
671       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
672       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->ierror);CHKERRQ(ierr);
673       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr);
674 #endif
675       PetscFunctionReturn(0);
676     } else if (format == PETSC_VIEWER_ASCII_INFO) {
677       PetscFunctionReturn(0);
678     }
679   } else if (isdraw) {
680     PetscDraw  draw;
681     PetscTruth isnull;
682 
683     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
684     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
685     if (isnull) PetscFunctionReturn(0);
686   }
687 
688   if (size == 1) {
689     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
690   } else {
691     /* assemble the entire matrix onto first processor. */
692     Mat         A;
693     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
694     PetscInt    *cols;
695     PetscScalar *vals;
696 
697     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
698     if (!rank) {
699       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
700     } else {
701       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
702     }
703     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
704     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
705     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
706     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
707 
708     /* Copy the matrix ... This isn't the most efficient means,
709        but it's quick for now */
710     A->insertmode = INSERT_VALUES;
711     row = mat->rmap.rstart; m = mdn->A->rmap.n;
712     for (i=0; i<m; i++) {
713       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
714       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
715       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
716       row++;
717     }
718 
719     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
720     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
721     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
722     if (!rank) {
723       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
724     }
725     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
726     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
727     ierr = MatDestroy(A);CHKERRQ(ierr);
728   }
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatView_MPIDense"
734 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
735 {
736   PetscErrorCode ierr;
737   PetscTruth     iascii,isbinary,isdraw,issocket;
738 
739   PetscFunctionBegin;
740 
741   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
742   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
743   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
744   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
745 
746   if (iascii || issocket || isdraw) {
747     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
748   } else if (isbinary) {
749     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
750   } else {
751     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetInfo_MPIDense"
758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
759 {
760   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
761   Mat            mdn = mat->A;
762   PetscErrorCode ierr;
763   PetscReal      isend[5],irecv[5];
764 
765   PetscFunctionBegin;
766   info->rows_global    = (double)A->rmap.N;
767   info->columns_global = (double)A->cmap.N;
768   info->rows_local     = (double)A->rmap.n;
769   info->columns_local  = (double)A->cmap.N;
770   info->block_size     = 1.0;
771   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
772   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
773   isend[3] = info->memory;  isend[4] = info->mallocs;
774   if (flag == MAT_LOCAL) {
775     info->nz_used      = isend[0];
776     info->nz_allocated = isend[1];
777     info->nz_unneeded  = isend[2];
778     info->memory       = isend[3];
779     info->mallocs      = isend[4];
780   } else if (flag == MAT_GLOBAL_MAX) {
781     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
782     info->nz_used      = irecv[0];
783     info->nz_allocated = irecv[1];
784     info->nz_unneeded  = irecv[2];
785     info->memory       = irecv[3];
786     info->mallocs      = irecv[4];
787   } else if (flag == MAT_GLOBAL_SUM) {
788     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
789     info->nz_used      = irecv[0];
790     info->nz_allocated = irecv[1];
791     info->nz_unneeded  = irecv[2];
792     info->memory       = irecv[3];
793     info->mallocs      = irecv[4];
794   }
795   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
796   info->fill_ratio_needed = 0;
797   info->factor_mallocs    = 0;
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatSetOption_MPIDense"
803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
804 {
805   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
806   PetscErrorCode ierr;
807 
808   PetscFunctionBegin;
809   switch (op) {
810   case MAT_NEW_NONZERO_LOCATIONS:
811   case MAT_NEW_NONZERO_LOCATION_ERR:
812   case MAT_NEW_NONZERO_ALLOCATION_ERR:
813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
814     break;
815   case MAT_ROW_ORIENTED:
816     a->roworiented = flg;
817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
818     break;
819   case MAT_NEW_DIAGONALS:
820   case MAT_USE_HASH_TABLE:
821     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
822     break;
823   case MAT_IGNORE_OFF_PROC_ENTRIES:
824     a->donotstash = flg;
825     break;
826   case MAT_SYMMETRIC:
827   case MAT_STRUCTURALLY_SYMMETRIC:
828   case MAT_HERMITIAN:
829   case MAT_SYMMETRY_ETERNAL:
830   case MAT_IGNORE_LOWER_TRIANGULAR:
831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
832     break;
833   default:
834     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatDiagonalScale_MPIDense"
842 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
843 {
844   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
845   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
846   PetscScalar    *l,*r,x,*v;
847   PetscErrorCode ierr;
848   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
849 
850   PetscFunctionBegin;
851   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
852   if (ll) {
853     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
854     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
855     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
856     for (i=0; i<m; i++) {
857       x = l[i];
858       v = mat->v + i;
859       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
860     }
861     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
862     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
863   }
864   if (rr) {
865     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
866     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
867     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
870     for (i=0; i<n; i++) {
871       x = r[i];
872       v = mat->v + i*m;
873       for (j=0; j<m; j++) { (*v++) *= x;}
874     }
875     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
876     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatNorm_MPIDense"
883 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
884 {
885   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
886   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
887   PetscErrorCode ierr;
888   PetscInt       i,j;
889   PetscReal      sum = 0.0;
890   PetscScalar    *v = mat->v;
891 
892   PetscFunctionBegin;
893   if (mdn->size == 1) {
894     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
895   } else {
896     if (type == NORM_FROBENIUS) {
897       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
898 #if defined(PETSC_USE_COMPLEX)
899         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
900 #else
901         sum += (*v)*(*v); v++;
902 #endif
903       }
904       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
905       *nrm = sqrt(*nrm);
906       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
907     } else if (type == NORM_1) {
908       PetscReal *tmp,*tmp2;
909       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
910       tmp2 = tmp + A->cmap.N;
911       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
912       *nrm = 0.0;
913       v = mat->v;
914       for (j=0; j<mdn->A->cmap.n; j++) {
915         for (i=0; i<mdn->A->rmap.n; i++) {
916           tmp[j] += PetscAbsScalar(*v);  v++;
917         }
918       }
919       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
920       for (j=0; j<A->cmap.N; j++) {
921         if (tmp2[j] > *nrm) *nrm = tmp2[j];
922       }
923       ierr = PetscFree(tmp);CHKERRQ(ierr);
924       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
925     } else if (type == NORM_INFINITY) { /* max row norm */
926       PetscReal ntemp;
927       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
928       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
929     } else {
930       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
931     }
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatTranspose_MPIDense"
938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
939 {
940   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
941   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
942   Mat            B;
943   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
944   PetscErrorCode ierr;
945   PetscInt       j,i;
946   PetscScalar    *v;
947 
948   PetscFunctionBegin;
949   if (A == *matout && M != N) {
950     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
951   }
952   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
953     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
954     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
955     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
956     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
957   } else {
958     B = *matout;
959   }
960 
961   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
962   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
963   for (i=0; i<m; i++) rwork[i] = rstart + i;
964   for (j=0; j<n; j++) {
965     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
966     v   += m;
967   }
968   ierr = PetscFree(rwork);CHKERRQ(ierr);
969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   if (*matout != A) {
972     *matout = B;
973   } else {
974     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #include "petscblaslapack.h"
980 #undef __FUNCT__
981 #define __FUNCT__ "MatScale_MPIDense"
982 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
983 {
984   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
985   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
986   PetscScalar    oalpha = alpha;
987   PetscErrorCode ierr;
988   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
989 
990   PetscFunctionBegin;
991   BLASscal_(&nz,&oalpha,a->v,&one);
992   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
997 
998 #undef __FUNCT__
999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1001 {
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1011 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1012 {
1013   PetscErrorCode ierr;
1014   PetscInt       m=A->rmap.n,n=B->cmap.n;
1015   Mat            Cmat;
1016 
1017   PetscFunctionBegin;
1018   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1019   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1020   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1021   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1022   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1023   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1025   *C = Cmat;
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #if defined(PETSC_HAVE_PLAPACK)
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSolve_MPIDense"
1032 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1033 {
1034   MPI_Comm       comm = ((PetscObject)A)->comm;
1035   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1036   PetscErrorCode ierr;
1037   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1038   PetscScalar    *array;
1039   PetscReal      one = 1.0;
1040   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1041   PLA_Obj        v_pla = NULL;
1042   PetscScalar    *loc_buf;
1043   Vec            loc_x;
1044 
1045   PetscFunctionBegin;
1046   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1047   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1048 
1049   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1050   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1051   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1052 
1053   /* Copy b into rhs_pla */
1054   ierr = PLA_API_begin();CHKERRQ(ierr);
1055   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1056   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1057   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1058   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1059   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1060   ierr = PLA_API_end();CHKERRQ(ierr);
1061 
1062   if (A->factor == FACTOR_LU){
1063     /* Apply the permutations to the right hand sides */
1064     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1065 
1066     /* Solve L y = b, overwriting b with y */
1067     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1068 
1069     /* Solve U x = y (=b), overwriting b with x */
1070     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1071   } else { /* FACTOR_CHOLESKY */
1072     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1073     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1074   }
1075 
1076   /* Copy PLAPACK x into Petsc vector x  */
1077   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1078   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1079   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1080   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1081   if (!lu->pla_solved){
1082 
1083     ierr = PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);CHKERRQ(ierr);
1084     ierr = PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);CHKERRQ(ierr);
1085     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1086 
1087     /* Create IS and cts for VecScatterring */
1088     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1089     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1090     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1091     idx_petsc = idx_pla + loc_m;
1092 
1093     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1094     for (i=0; i<loc_m; i+=lu->nb){
1095       j = 0;
1096       while (j < lu->nb && i+j < loc_m){
1097         idx_petsc[i+j] = rstart + j; j++;
1098       }
1099       rstart += size*lu->nb;
1100     }
1101 
1102     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1103 
1104     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1105     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1106     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1107     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1108   }
1109   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111 
1112   /* Free data */
1113   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1114   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1115 
1116   lu->pla_solved = PETSC_TRUE;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1122 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1123 {
1124   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1125   PetscErrorCode ierr;
1126   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1127   PetscInt       info_pla=0;
1128   PetscScalar    *array,one = 1.0;
1129 
1130   PetscFunctionBegin;
1131   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1132 
1133   /* Copy A into lu->A */
1134   ierr = PLA_API_begin();CHKERRQ(ierr);
1135   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1137   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1138   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1139   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1140   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1141   ierr = PLA_API_end();CHKERRQ(ierr);
1142 
1143   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1144   info_pla = PLA_LU(lu->A,lu->pivots);
1145   if (info_pla != 0)
1146     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1147 
1148   lu->CleanUpPlapack = PETSC_TRUE;
1149   lu->rstart         = rstart;
1150 
1151   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1157 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1158 {
1159   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1160   PetscErrorCode ierr;
1161   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1162   PetscInt       info_pla=0;
1163   PetscScalar    *array,one = 1.0;
1164 
1165   PetscFunctionBegin;
1166 
1167   /* Copy A into lu->A */
1168   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1169   ierr = PLA_API_begin();CHKERRQ(ierr);
1170   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1171   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1172   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1173   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1174   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1175   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1176   ierr = PLA_API_end();CHKERRQ(ierr);
1177 
1178   /* Factor P A -> Chol */
1179   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1180   if (info_pla != 0)
1181     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1182 
1183   lu->CleanUpPlapack = PETSC_TRUE;
1184   lu->rstart         = rstart;
1185 
1186   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1192 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1193 {
1194   Mat            B;
1195   Mat_Plapack    *lu;
1196   PetscErrorCode ierr;
1197   PetscInt       M=A->rmap.N,N=A->cmap.N;
1198   MPI_Comm       comm=((PetscObject)A)->comm,comm_2d;
1199   PetscMPIInt    size;
1200   PetscInt       ierror;
1201 
1202   PetscFunctionBegin;
1203   /* Create the factorization matrix */
1204   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1205   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
1206   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1207 
1208   lu = (Mat_Plapack*)(B->spptr);
1209 
1210   /* Set default Plapack parameters */
1211   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1212   lu->nprows = 1; lu->npcols = size;
1213   ierror = 0;
1214   lu->nb     = M/size;
1215   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1216 
1217   /* Set runtime options */
1218   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1219   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr);
1220   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr);
1221 
1222   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1223   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
1224   if (ierror){
1225     ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
1226   } else {
1227     ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
1228   }
1229   lu->ierror = ierror;
1230 
1231   lu->nb_alg = 0;
1232   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
1233   if (lu->nb_alg){
1234     ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
1235   }
1236   PetscOptionsEnd();
1237 
1238 
1239   /* Create a 2D communicator */
1240   ierr = PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);CHKERRQ(ierr);
1241   lu->comm_2d = comm_2d;
1242 
1243   /* Create object distribution template */
1244   lu->templ = NULL;
1245   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1246 
1247   /* Use suggested nb_alg if it is not provided by user */
1248   if (lu->nb_alg == 0){
1249     ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr);
1250     ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
1251   }
1252 
1253   /* Set the datatype */
1254 #if defined(PETSC_USE_COMPLEX)
1255   lu->datatype = MPI_DOUBLE_COMPLEX;
1256 #else
1257   lu->datatype = MPI_DOUBLE;
1258 #endif
1259 
1260   ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1261 
1262   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1263   lu->CleanUpPlapack = PETSC_TRUE;
1264   *F                 = B;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 /* Note the Petsc r and c permutations are ignored */
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1271 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1272 {
1273   PetscErrorCode ierr;
1274   PetscInt       M = A->rmap.N;
1275   Mat_Plapack    *lu;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1279   lu = (Mat_Plapack*)(*F)->spptr;
1280   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1281   (*F)->factor = FACTOR_LU;
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 /* Note the Petsc perm permutation is ignored */
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1288 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1289 {
1290   PetscErrorCode ierr;
1291   PetscTruth     issymmetric,set;
1292 
1293   PetscFunctionBegin;
1294   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1295   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1296   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1297   (*F)->factor = FACTOR_CHOLESKY;
1298   PetscFunctionReturn(0);
1299 }
1300 #endif
1301 
1302 /* -------------------------------------------------------------------*/
1303 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1304        MatGetRow_MPIDense,
1305        MatRestoreRow_MPIDense,
1306        MatMult_MPIDense,
1307 /* 4*/ MatMultAdd_MPIDense,
1308        MatMultTranspose_MPIDense,
1309        MatMultTransposeAdd_MPIDense,
1310 #if defined(PETSC_HAVE_PLAPACK)
1311        MatSolve_MPIDense,
1312 #else
1313        0,
1314 #endif
1315        0,
1316        0,
1317 /*10*/ 0,
1318        0,
1319        0,
1320        0,
1321        MatTranspose_MPIDense,
1322 /*15*/ MatGetInfo_MPIDense,
1323        MatEqual_MPIDense,
1324        MatGetDiagonal_MPIDense,
1325        MatDiagonalScale_MPIDense,
1326        MatNorm_MPIDense,
1327 /*20*/ MatAssemblyBegin_MPIDense,
1328        MatAssemblyEnd_MPIDense,
1329        0,
1330        MatSetOption_MPIDense,
1331        MatZeroEntries_MPIDense,
1332 /*25*/ MatZeroRows_MPIDense,
1333 #if defined(PETSC_HAVE_PLAPACK)
1334        MatLUFactorSymbolic_MPIDense,
1335        MatLUFactorNumeric_MPIDense,
1336        MatCholeskyFactorSymbolic_MPIDense,
1337        MatCholeskyFactorNumeric_MPIDense,
1338 #else
1339        0,
1340        0,
1341        0,
1342        0,
1343 #endif
1344 /*30*/ MatSetUpPreallocation_MPIDense,
1345        0,
1346        0,
1347        MatGetArray_MPIDense,
1348        MatRestoreArray_MPIDense,
1349 /*35*/ MatDuplicate_MPIDense,
1350        0,
1351        0,
1352        0,
1353        0,
1354 /*40*/ 0,
1355        MatGetSubMatrices_MPIDense,
1356        0,
1357        MatGetValues_MPIDense,
1358        0,
1359 /*45*/ 0,
1360        MatScale_MPIDense,
1361        0,
1362        0,
1363        0,
1364 /*50*/ 0,
1365        0,
1366        0,
1367        0,
1368        0,
1369 /*55*/ 0,
1370        0,
1371        0,
1372        0,
1373        0,
1374 /*60*/ MatGetSubMatrix_MPIDense,
1375        MatDestroy_MPIDense,
1376        MatView_MPIDense,
1377        0,
1378        0,
1379 /*65*/ 0,
1380        0,
1381        0,
1382        0,
1383        0,
1384 /*70*/ 0,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*75*/ 0,
1390        0,
1391        0,
1392        0,
1393        0,
1394 /*80*/ 0,
1395        0,
1396        0,
1397        0,
1398 /*84*/ MatLoad_MPIDense,
1399        0,
1400        0,
1401        0,
1402        0,
1403        0,
1404 /*90*/ 0,
1405        MatMatMultSymbolic_MPIDense_MPIDense,
1406        0,
1407        0,
1408        0,
1409 /*95*/ 0,
1410        0,
1411        0,
1412        0};
1413 
1414 EXTERN_C_BEGIN
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1417 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1418 {
1419   Mat_MPIDense   *a;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   mat->preallocated = PETSC_TRUE;
1424   /* Note:  For now, when data is specified above, this assumes the user correctly
1425    allocates the local dense storage space.  We should add error checking. */
1426 
1427   a    = (Mat_MPIDense*)mat->data;
1428   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1429   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1430   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1431   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1432   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 EXTERN_C_END
1436 
1437 EXTERN_C_BEGIN
1438 #if defined(PETSC_HAVE_PLAPACK)
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1441 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1442 {
1443   PetscFunctionBegin;
1444   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1445   PetscFunctionReturn(0);
1446 }
1447 #endif
1448 EXTERN_C_END
1449 
1450 /*MC
1451    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1452 
1453    Options Database Keys:
1454 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1455 
1456   Level: beginner
1457 
1458   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1459   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1460   (run config/configure.py with the option --download-plapack)
1461 
1462 
1463   Options Database Keys:
1464 . -mat_plapack_nprows <n> - number of rows in processor partition
1465 . -mat_plapack_npcols <n> - number of columns in processor partition
1466 . -mat_plapack_nb <n> - block size of template vector
1467 . -mat_plapack_nb_alg <n> - algorithmic block size
1468 - -mat_plapack_ckerror <n> - error checking flag
1469 
1470 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1471 M*/
1472 
1473 EXTERN_C_BEGIN
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatCreate_MPIDense"
1476 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1477 {
1478   Mat_MPIDense   *a;
1479   PetscErrorCode ierr;
1480 #if defined(PETSC_HAVE_PLAPACK)
1481   Mat_Plapack    *lu;
1482 #endif
1483 
1484   PetscFunctionBegin;
1485   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1486   mat->data         = (void*)a;
1487   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1488   mat->factor       = 0;
1489   mat->mapping      = 0;
1490 
1491   mat->insertmode = NOT_SET_VALUES;
1492   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1493   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1494 
1495   mat->rmap.bs = mat->cmap.bs = 1;
1496   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1497   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1498   a->nvec = mat->cmap.n;
1499 
1500   /* build cache for off array entries formed */
1501   a->donotstash = PETSC_FALSE;
1502   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1503 
1504   /* stuff used for matrix vector multiply */
1505   a->lvec        = 0;
1506   a->Mvctx       = 0;
1507   a->roworiented = PETSC_TRUE;
1508 
1509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1510                                      "MatGetDiagonalBlock_MPIDense",
1511                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1513                                      "MatMPIDenseSetPreallocation_MPIDense",
1514                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1516                                      "MatMatMult_MPIAIJ_MPIDense",
1517                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1519                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1520                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1522                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1523                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1524 #if defined(PETSC_HAVE_PLAPACK)
1525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1526                                      "MatSetSolverType_MPIDense_PLAPACK",
1527                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1528 #endif
1529   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1530 
1531 #if defined(PETSC_HAVE_PLAPACK)
1532   ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr);
1533   lu->CleanUpPlapack       = PETSC_FALSE;
1534   mat->spptr                 = (void*)lu;
1535 #endif
1536   PetscFunctionReturn(0);
1537 }
1538 EXTERN_C_END
1539 
1540 /*MC
1541    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1542 
1543    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1544    and MATMPIDENSE otherwise.
1545 
1546    Options Database Keys:
1547 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1548 
1549   Level: beginner
1550 
1551 
1552 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1553 M*/
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatCreate_Dense"
1558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1559 {
1560   PetscErrorCode ierr;
1561   PetscMPIInt    size;
1562 
1563   PetscFunctionBegin;
1564   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1565   if (size == 1) {
1566     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1567   } else {
1568     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 EXTERN_C_END
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1576 /*@C
1577    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1578 
1579    Not collective
1580 
1581    Input Parameters:
1582 .  A - the matrix
1583 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1584    to control all matrix memory allocation.
1585 
1586    Notes:
1587    The dense format is fully compatible with standard Fortran 77
1588    storage by columns.
1589 
1590    The data input variable is intended primarily for Fortran programmers
1591    who wish to allocate their own matrix memory space.  Most users should
1592    set data=PETSC_NULL.
1593 
1594    Level: intermediate
1595 
1596 .keywords: matrix,dense, parallel
1597 
1598 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1599 @*/
1600 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1601 {
1602   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1603 
1604   PetscFunctionBegin;
1605   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1606   if (f) {
1607     ierr = (*f)(mat,data);CHKERRQ(ierr);
1608   }
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MatCreateMPIDense"
1614 /*@C
1615    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1616 
1617    Collective on MPI_Comm
1618 
1619    Input Parameters:
1620 +  comm - MPI communicator
1621 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1622 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1623 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1624 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1625 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1626    to control all matrix memory allocation.
1627 
1628    Output Parameter:
1629 .  A - the matrix
1630 
1631    Notes:
1632    The dense format is fully compatible with standard Fortran 77
1633    storage by columns.
1634 
1635    The data input variable is intended primarily for Fortran programmers
1636    who wish to allocate their own matrix memory space.  Most users should
1637    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1638 
1639    The user MUST specify either the local or global matrix dimensions
1640    (possibly both).
1641 
1642    Level: intermediate
1643 
1644 .keywords: matrix,dense, parallel
1645 
1646 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1647 @*/
1648 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1649 {
1650   PetscErrorCode ierr;
1651   PetscMPIInt    size;
1652 
1653   PetscFunctionBegin;
1654   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1655   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1657   if (size > 1) {
1658     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1659     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1660   } else {
1661     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1662     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatDuplicate_MPIDense"
1669 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1670 {
1671   Mat            mat;
1672   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   *newmat       = 0;
1677   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1678   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1679   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1680   a                 = (Mat_MPIDense*)mat->data;
1681   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1682   mat->factor       = A->factor;
1683   mat->assembled    = PETSC_TRUE;
1684   mat->preallocated = PETSC_TRUE;
1685 
1686   mat->rmap.rstart     = A->rmap.rstart;
1687   mat->rmap.rend       = A->rmap.rend;
1688   a->size              = oldmat->size;
1689   a->rank              = oldmat->rank;
1690   mat->insertmode      = NOT_SET_VALUES;
1691   a->nvec              = oldmat->nvec;
1692   a->donotstash        = oldmat->donotstash;
1693 
1694   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1695   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1696   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1697 
1698   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1699   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1700   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1701 
1702 #if defined(PETSC_HAVE_PLAPACK)
1703   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1704 #endif
1705   *newmat = mat;
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #include "petscsys.h"
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1713 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1714 {
1715   PetscErrorCode ierr;
1716   PetscMPIInt    rank,size;
1717   PetscInt       *rowners,i,m,nz,j;
1718   PetscScalar    *array,*vals,*vals_ptr;
1719   MPI_Status     status;
1720 
1721   PetscFunctionBegin;
1722   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1723   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1724 
1725   /* determine ownership of all rows */
1726   m          = M/size + ((M % size) > rank);
1727   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1728   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1729   rowners[0] = 0;
1730   for (i=2; i<=size; i++) {
1731     rowners[i] += rowners[i-1];
1732   }
1733 
1734   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1735   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1736   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1737   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1738   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1739 
1740   if (!rank) {
1741     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1742 
1743     /* read in my part of the matrix numerical values  */
1744     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1745 
1746     /* insert into matrix-by row (this is why cannot directly read into array */
1747     vals_ptr = vals;
1748     for (i=0; i<m; i++) {
1749       for (j=0; j<N; j++) {
1750         array[i + j*m] = *vals_ptr++;
1751       }
1752     }
1753 
1754     /* read in other processors and ship out */
1755     for (i=1; i<size; i++) {
1756       nz   = (rowners[i+1] - rowners[i])*N;
1757       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1758       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1759     }
1760   } else {
1761     /* receive numeric values */
1762     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1763 
1764     /* receive message of values*/
1765     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1766 
1767     /* insert into matrix-by row (this is why cannot directly read into array */
1768     vals_ptr = vals;
1769     for (i=0; i<m; i++) {
1770       for (j=0; j<N; j++) {
1771         array[i + j*m] = *vals_ptr++;
1772       }
1773     }
1774   }
1775   ierr = PetscFree(rowners);CHKERRQ(ierr);
1776   ierr = PetscFree(vals);CHKERRQ(ierr);
1777   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1778   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatLoad_MPIDense"
1784 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1785 {
1786   Mat            A;
1787   PetscScalar    *vals,*svals;
1788   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1789   MPI_Status     status;
1790   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1791   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1792   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1793   PetscInt       i,nz,j,rstart,rend;
1794   int            fd;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1799   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1800   if (!rank) {
1801     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1802     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1803     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1804   }
1805 
1806   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1807   M = header[1]; N = header[2]; nz = header[3];
1808 
1809   /*
1810        Handle case where matrix is stored on disk as a dense matrix
1811   */
1812   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1813     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1814     PetscFunctionReturn(0);
1815   }
1816 
1817   /* determine ownership of all rows */
1818   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1819   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1820   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1821   rowners[0] = 0;
1822   for (i=2; i<=size; i++) {
1823     rowners[i] += rowners[i-1];
1824   }
1825   rstart = rowners[rank];
1826   rend   = rowners[rank+1];
1827 
1828   /* distribute row lengths to all processors */
1829   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1830   offlens = ourlens + (rend-rstart);
1831   if (!rank) {
1832     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1833     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1834     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1835     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1836     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1837     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1838   } else {
1839     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1840   }
1841 
1842   if (!rank) {
1843     /* calculate the number of nonzeros on each processor */
1844     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1845     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1846     for (i=0; i<size; i++) {
1847       for (j=rowners[i]; j< rowners[i+1]; j++) {
1848         procsnz[i] += rowlengths[j];
1849       }
1850     }
1851     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1852 
1853     /* determine max buffer needed and allocate it */
1854     maxnz = 0;
1855     for (i=0; i<size; i++) {
1856       maxnz = PetscMax(maxnz,procsnz[i]);
1857     }
1858     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1859 
1860     /* read in my part of the matrix column indices  */
1861     nz = procsnz[0];
1862     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1863     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1864 
1865     /* read in every one elses and ship off */
1866     for (i=1; i<size; i++) {
1867       nz   = procsnz[i];
1868       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1869       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1870     }
1871     ierr = PetscFree(cols);CHKERRQ(ierr);
1872   } else {
1873     /* determine buffer space needed for message */
1874     nz = 0;
1875     for (i=0; i<m; i++) {
1876       nz += ourlens[i];
1877     }
1878     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1879 
1880     /* receive message of column indices*/
1881     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1882     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1883     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1884   }
1885 
1886   /* loop over local rows, determining number of off diagonal entries */
1887   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1888   jj = 0;
1889   for (i=0; i<m; i++) {
1890     for (j=0; j<ourlens[i]; j++) {
1891       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1892       jj++;
1893     }
1894   }
1895 
1896   /* create our matrix */
1897   for (i=0; i<m; i++) {
1898     ourlens[i] -= offlens[i];
1899   }
1900   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1901   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1902   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1903   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1904   A = *newmat;
1905   for (i=0; i<m; i++) {
1906     ourlens[i] += offlens[i];
1907   }
1908 
1909   if (!rank) {
1910     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1911 
1912     /* read in my part of the matrix numerical values  */
1913     nz = procsnz[0];
1914     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1915 
1916     /* insert into matrix */
1917     jj      = rstart;
1918     smycols = mycols;
1919     svals   = vals;
1920     for (i=0; i<m; i++) {
1921       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1922       smycols += ourlens[i];
1923       svals   += ourlens[i];
1924       jj++;
1925     }
1926 
1927     /* read in other processors and ship out */
1928     for (i=1; i<size; i++) {
1929       nz   = procsnz[i];
1930       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1931       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1932     }
1933     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1934   } else {
1935     /* receive numeric values */
1936     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1937 
1938     /* receive message of values*/
1939     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1940     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1941     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1942 
1943     /* insert into matrix */
1944     jj      = rstart;
1945     smycols = mycols;
1946     svals   = vals;
1947     for (i=0; i<m; i++) {
1948       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1949       smycols += ourlens[i];
1950       svals   += ourlens[i];
1951       jj++;
1952     }
1953   }
1954   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1955   ierr = PetscFree(vals);CHKERRQ(ierr);
1956   ierr = PetscFree(mycols);CHKERRQ(ierr);
1957   ierr = PetscFree(rowners);CHKERRQ(ierr);
1958 
1959   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1960   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatEqual_MPIDense"
1966 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1967 {
1968   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1969   Mat            a,b;
1970   PetscTruth     flg;
1971   PetscErrorCode ierr;
1972 
1973   PetscFunctionBegin;
1974   a = matA->A;
1975   b = matB->A;
1976   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1977   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #if defined(PETSC_HAVE_PLAPACK)
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
1985 /*@C
1986   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
1987   Level: developer
1988 
1989 .keywords: Petsc, destroy, package, PLAPACK
1990 .seealso: PetscFinalize()
1991 @*/
1992 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
1993 {
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = PLA_Finalize();CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2003 /*@C
2004   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2005   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2006   when using static libraries.
2007 
2008   Input Parameter:
2009   path - The dynamic library path, or PETSC_NULL
2010 
2011   Level: developer
2012 
2013 .keywords: Petsc, initialize, package, PLAPACK
2014 .seealso: PetscInitializePackage(), PetscInitialize()
2015 @*/
2016 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2017 {
2018   MPI_Comm       comm = PETSC_COMM_WORLD,comm_2d;
2019   int            initPLA;
2020   PetscMPIInt    size,nprows,npcols;
2021   PetscErrorCode ierr;
2022 
2023   PetscFunctionBegin;
2024   if (!PLA_Initialized(PETSC_NULL)) {
2025 
2026     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2027     nprows = 1;
2028     npcols = size;
2029 
2030     ierr = PLA_Comm_1D_to_2D(comm,nprows,npcols,&comm_2d);CHKERRQ(ierr);
2031     ierr = PLA_Init(comm_2d);CHKERRQ(ierr);
2032     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 
2038 #endif
2039