xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 9a8c540fe3817a4a8ffc0ba5aadcc32494d11f9d)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.115 1999/06/08 22:55:41 balay Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if ( !A->donotstash) {
39         if (roworiented) {
40           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41         } else {
42           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
43         }
44       }
45     }
46   }
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "MatGetValues_MPIDense"
52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53 {
54   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56 
57   PetscFunctionBegin;
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65         if (idxn[j] >= mdn->N) {
66           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67         }
68         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69       }
70     } else {
71       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int          ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatRestoreArray_MPIDense"
91 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
92 {
93   PetscFunctionBegin;
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNC__
98 #define __FUNC__ "MatAssemblyBegin_MPIDense"
99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
100 {
101   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
102   MPI_Comm     comm = mat->comm;
103   int          ierr,nstash,reallocs;
104   InsertMode   addv;
105 
106   PetscFunctionBegin;
107   /* make sure all processors are either in INSERTMODE or ADDMODE */
108   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
109   if (addv == (ADD_VALUES|INSERT_VALUES)) {
110     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
111   }
112   mat->insertmode = addv; /* in case this processor had no cache */
113 
114   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
115   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
116   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
117            nstash,reallocs);
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatAssemblyEnd_MPIDense"
123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
124 {
125   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
126   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
127   Scalar       *val;
128   InsertMode   addv=mat->insertmode;
129 
130   PetscFunctionBegin;
131   /*  wait on receives */
132   while (1) {
133     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
134     if (!flg) break;
135 
136     for ( i=0; i<n; ) {
137       /* Now identify the consecutive vals belonging to the same row */
138       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
139       if (j < n) ncols = j-i;
140       else       ncols = n-i;
141       /* Now assemble all these values with a single function call */
142       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
143       i = j;
144     }
145   }
146   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
147 
148   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
149   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
150 
151   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
152     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNC__
158 #define __FUNC__ "MatZeroEntries_MPIDense"
159 int MatZeroEntries_MPIDense(Mat A)
160 {
161   int          ierr;
162   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
163 
164   PetscFunctionBegin;
165   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNC__
170 #define __FUNC__ "MatGetBlockSize_MPIDense"
171 int MatGetBlockSize_MPIDense(Mat A,int *bs)
172 {
173   PetscFunctionBegin;
174   *bs = 1;
175   PetscFunctionReturn(0);
176 }
177 
178 /* the code does not do the diagonal entries correctly unless the
179    matrix is square and the column and row owerships are identical.
180    This is a BUG. The only way to fix it seems to be to access
181    mdn->A and mdn->B directly and not through the MatZeroRows()
182    routine.
183 */
184 #undef __FUNC__
185 #define __FUNC__ "MatZeroRows_MPIDense"
186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
187 {
188   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
189   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
190   int            *procs,*nprocs,j,found,idx,nsends,*work;
191   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
192   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
193   int            *lens,imdex,*lrows,*values;
194   MPI_Comm       comm = A->comm;
195   MPI_Request    *send_waits,*recv_waits;
196   MPI_Status     recv_status,*send_status;
197   IS             istmp;
198 
199   PetscFunctionBegin;
200   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
201   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
202 
203   /*  first count number of contributors to each processor */
204   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
205   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
206   procs  = nprocs + size;
207   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
208   for ( i=0; i<N; i++ ) {
209     idx = rows[i];
210     found = 0;
211     for ( j=0; j<size; j++ ) {
212       if (idx >= owners[j] && idx < owners[j+1]) {
213         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
214       }
215     }
216     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
217   }
218   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
219 
220   /* inform other processors of number of messages and max length*/
221   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
222   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
223   nrecvs = work[rank];
224   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
225   nmax   = work[rank];
226   PetscFree(work);
227 
228   /* post receives:   */
229   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
230   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
231   for ( i=0; i<nrecvs; i++ ) {
232     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
233   }
234 
235   /* do sends:
236       1) starts[i] gives the starting index in svalues for stuff going to
237          the ith processor
238   */
239   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
240   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
241   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
242   starts[0]  = 0;
243   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
244   for ( i=0; i<N; i++ ) {
245     svalues[starts[owner[i]]++] = rows[i];
246   }
247   ISRestoreIndices(is,&rows);
248 
249   starts[0] = 0;
250   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
251   count = 0;
252   for ( i=0; i<size; i++ ) {
253     if (procs[i]) {
254       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
255     }
256   }
257   PetscFree(starts);
258 
259   base = owners[rank];
260 
261   /*  wait on receives */
262   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
263   source = lens + nrecvs;
264   count  = nrecvs; slen = 0;
265   while (count) {
266     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
267     /* unpack receives into our local space */
268     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
269     source[imdex]  = recv_status.MPI_SOURCE;
270     lens[imdex]  = n;
271     slen += n;
272     count--;
273   }
274   PetscFree(recv_waits);
275 
276   /* move the data into the send scatter */
277   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
278   count = 0;
279   for ( i=0; i<nrecvs; i++ ) {
280     values = rvalues + i*nmax;
281     for ( j=0; j<lens[i]; j++ ) {
282       lrows[count++] = values[j] - base;
283     }
284   }
285   PetscFree(rvalues); PetscFree(lens);
286   PetscFree(owner); PetscFree(nprocs);
287 
288   /* actually zap the local rows */
289   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
290   PLogObjectParent(A,istmp);
291   PetscFree(lrows);
292   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
293   ierr = ISDestroy(istmp);CHKERRQ(ierr);
294 
295   /* wait on sends */
296   if (nsends) {
297     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
298     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
299     PetscFree(send_status);
300   }
301   PetscFree(send_waits); PetscFree(svalues);
302 
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNC__
307 #define __FUNC__ "MatMult_MPIDense"
308 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
309 {
310   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
311   int          ierr;
312 
313   PetscFunctionBegin;
314   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
315   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
316   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 #undef __FUNC__
321 #define __FUNC__ "MatMultAdd_MPIDense"
322 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
323 {
324   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
325   int          ierr;
326 
327   PetscFunctionBegin;
328   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
329   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
330   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNC__
335 #define __FUNC__ "MatMultTrans_MPIDense"
336 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
337 {
338   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
339   int          ierr;
340   Scalar       zero = 0.0;
341 
342   PetscFunctionBegin;
343   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
344   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
345   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
346   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ "MatMultTransAdd_MPIDense"
352 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
353 {
354   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
355   int          ierr;
356 
357   PetscFunctionBegin;
358   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
359   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
360   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
361   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNC__
366 #define __FUNC__ "MatGetDiagonal_MPIDense"
367 int MatGetDiagonal_MPIDense(Mat A,Vec v)
368 {
369   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
370   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
371   int          ierr, len, i, n, m = a->m, radd;
372   Scalar       *x, zero = 0.0;
373 
374   PetscFunctionBegin;
375   VecSet(&zero,v);
376   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
377   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
378   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
379   len = PetscMin(aloc->m,aloc->n);
380   radd = a->rstart*m;
381   for ( i=0; i<len; i++ ) {
382     x[i] = aloc->v[radd + i*m + i];
383   }
384   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNC__
389 #define __FUNC__ "MatDestroy_MPIDense"
390 int MatDestroy_MPIDense(Mat mat)
391 {
392   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
393   int          ierr;
394 
395   PetscFunctionBegin;
396   if (--mat->refct > 0) PetscFunctionReturn(0);
397 
398   if (mat->mapping) {
399     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
400   }
401   if (mat->bmapping) {
402     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
403   }
404 #if defined(PETSC_USE_LOG)
405   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
406 #endif
407   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
408   PetscFree(mdn->rowners);
409   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
410   if (mdn->lvec)   VecDestroy(mdn->lvec);
411   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
412   if (mdn->factor) {
413     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
414     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
415     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
416     PetscFree(mdn->factor);
417   }
418   PetscFree(mdn);
419   if (mat->rmap) {
420     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
421   }
422   if (mat->cmap) {
423     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
424   }
425   PLogObjectDestroy(mat);
426   PetscHeaderDestroy(mat);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNC__
431 #define __FUNC__ "MatView_MPIDense_Binary"
432 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
433 {
434   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
435   int          ierr;
436 
437   PetscFunctionBegin;
438   if (mdn->size == 1) {
439     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
440   }
441   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
442   PetscFunctionReturn(0);
443 }
444 
445 #undef __FUNC__
446 #define __FUNC__ "MatView_MPIDense_ASCII"
447 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
448 {
449   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
450   int          ierr, format, size = mdn->size, rank = mdn->rank;
451   FILE         *fd;
452   ViewerType   vtype;
453 
454   PetscFunctionBegin;
455   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
456   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
457   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
458   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
459     MatInfo info;
460     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
461     PetscSequentialPhaseBegin(mat->comm,1);
462       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
463          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
464       fflush(fd);
465     PetscSequentialPhaseEnd(mat->comm,1);
466     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
467     PetscFunctionReturn(0);
468   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
469     PetscFunctionReturn(0);
470   }
471 
472   if (size == 1) {
473     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
474   } else {
475     /* assemble the entire matrix onto first processor. */
476     Mat          A;
477     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
478     Scalar       *vals;
479     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
480 
481     if (!rank) {
482       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
483     } else {
484       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
485     }
486     PLogObjectParent(mat,A);
487 
488     /* Copy the matrix ... This isn't the most efficient means,
489        but it's quick for now */
490     row = mdn->rstart; m = Amdn->m;
491     for ( i=0; i<m; i++ ) {
492       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
493       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
494       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
495       row++;
496     }
497 
498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
500     if (!rank) {
501       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
502     }
503     ierr = MatDestroy(A);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNC__
509 #define __FUNC__ "MatView_MPIDense"
510 int MatView_MPIDense(Mat mat,Viewer viewer)
511 {
512   int          ierr;
513   ViewerType   vtype;
514 
515   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
516   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
517     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
518   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
519     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
520   } else {
521     SETERRQ(1,1,"Viewer type not supported by PETSc object");
522   }
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNC__
527 #define __FUNC__ "MatGetInfo_MPIDense"
528 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
529 {
530   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
531   Mat          mdn = mat->A;
532   int          ierr;
533   double       isend[5], irecv[5];
534 
535   PetscFunctionBegin;
536   info->rows_global    = (double)mat->M;
537   info->columns_global = (double)mat->N;
538   info->rows_local     = (double)mat->m;
539   info->columns_local  = (double)mat->N;
540   info->block_size     = 1.0;
541   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
542   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
543   isend[3] = info->memory;  isend[4] = info->mallocs;
544   if (flag == MAT_LOCAL) {
545     info->nz_used      = isend[0];
546     info->nz_allocated = isend[1];
547     info->nz_unneeded  = isend[2];
548     info->memory       = isend[3];
549     info->mallocs      = isend[4];
550   } else if (flag == MAT_GLOBAL_MAX) {
551     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
552     info->nz_used      = irecv[0];
553     info->nz_allocated = irecv[1];
554     info->nz_unneeded  = irecv[2];
555     info->memory       = irecv[3];
556     info->mallocs      = irecv[4];
557   } else if (flag == MAT_GLOBAL_SUM) {
558     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
559     info->nz_used      = irecv[0];
560     info->nz_allocated = irecv[1];
561     info->nz_unneeded  = irecv[2];
562     info->memory       = irecv[3];
563     info->mallocs      = irecv[4];
564   }
565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
566   info->fill_ratio_needed = 0;
567   info->factor_mallocs    = 0;
568   PetscFunctionReturn(0);
569 }
570 
571 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
572    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
573    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
574    extern int MatSolve_MPIDense(Mat,Vec,Vec);
575    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
576    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
577    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatSetOption_MPIDense"
581 int MatSetOption_MPIDense(Mat A,MatOption op)
582 {
583   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
584 
585   PetscFunctionBegin;
586   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
587       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
588       op == MAT_NEW_NONZERO_LOCATION_ERR ||
589       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
590       op == MAT_COLUMNS_SORTED ||
591       op == MAT_COLUMNS_UNSORTED) {
592         MatSetOption(a->A,op);
593   } else if (op == MAT_ROW_ORIENTED) {
594         a->roworiented = 1;
595         MatSetOption(a->A,op);
596   } else if (op == MAT_ROWS_SORTED ||
597              op == MAT_ROWS_UNSORTED ||
598              op == MAT_SYMMETRIC ||
599              op == MAT_STRUCTURALLY_SYMMETRIC ||
600              op == MAT_YES_NEW_DIAGONALS ||
601              op == MAT_USE_HASH_TABLE) {
602     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
603   } else if (op == MAT_COLUMN_ORIENTED) {
604     a->roworiented = 0; MatSetOption(a->A,op);
605   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
606     a->donotstash = 1;
607   } else if (op == MAT_NO_NEW_DIAGONALS) {
608     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
609   } else {
610     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNC__
616 #define __FUNC__ "MatGetSize_MPIDense"
617 int MatGetSize_MPIDense(Mat A,int *m,int *n)
618 {
619   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
620 
621   PetscFunctionBegin;
622   *m = mat->M; *n = mat->N;
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNC__
627 #define __FUNC__ "MatGetLocalSize_MPIDense"
628 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
629 {
630   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
631 
632   PetscFunctionBegin;
633   *m = mat->m; *n = mat->N;
634   PetscFunctionReturn(0);
635 }
636 
637 #undef __FUNC__
638 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
639 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
640 {
641   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
642 
643   PetscFunctionBegin;
644   *m = mat->rstart; *n = mat->rend;
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNC__
649 #define __FUNC__ "MatGetRow_MPIDense"
650 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
651 {
652   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
653   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
654 
655   PetscFunctionBegin;
656   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
657   lrow = row - rstart;
658   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNC__
663 #define __FUNC__ "MatRestoreRow_MPIDense"
664 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
665 {
666   PetscFunctionBegin;
667   if (idx) PetscFree(*idx);
668   if (v) PetscFree(*v);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNC__
673 #define __FUNC__ "MatNorm_MPIDense"
674 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
675 {
676   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
677   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
678   int          ierr, i, j;
679   double       sum = 0.0;
680   Scalar       *v = mat->v;
681 
682   PetscFunctionBegin;
683   if (mdn->size == 1) {
684     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
685   } else {
686     if (type == NORM_FROBENIUS) {
687       for (i=0; i<mat->n*mat->m; i++ ) {
688 #if defined(PETSC_USE_COMPLEX)
689         sum += PetscReal(PetscConj(*v)*(*v)); v++;
690 #else
691         sum += (*v)*(*v); v++;
692 #endif
693       }
694       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
695       *norm = sqrt(*norm);
696       PLogFlops(2*mat->n*mat->m);
697     } else if (type == NORM_1) {
698       double *tmp, *tmp2;
699       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
700       tmp2 = tmp + mdn->N;
701       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
702       *norm = 0.0;
703       v = mat->v;
704       for ( j=0; j<mat->n; j++ ) {
705         for ( i=0; i<mat->m; i++ ) {
706           tmp[j] += PetscAbsScalar(*v);  v++;
707         }
708       }
709       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
710       for ( j=0; j<mdn->N; j++ ) {
711         if (tmp2[j] > *norm) *norm = tmp2[j];
712       }
713       PetscFree(tmp);
714       PLogFlops(mat->n*mat->m);
715     } else if (type == NORM_INFINITY) { /* max row norm */
716       double ntemp;
717       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
718       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
719     } else {
720       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
721     }
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNC__
727 #define __FUNC__ "MatTranspose_MPIDense"
728 int MatTranspose_MPIDense(Mat A,Mat *matout)
729 {
730   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
731   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
732   Mat          B;
733   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
734   int          j, i, ierr;
735   Scalar       *v;
736 
737   PetscFunctionBegin;
738   if (matout == PETSC_NULL && M != N) {
739     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
740   }
741   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
742 
743   m = Aloc->m; n = Aloc->n; v = Aloc->v;
744   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
745   for ( j=0; j<n; j++ ) {
746     for (i=0; i<m; i++) rwork[i] = rstart + i;
747     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
748     v   += m;
749   }
750   PetscFree(rwork);
751   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
752   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
753   if (matout != PETSC_NULL) {
754     *matout = B;
755   } else {
756     PetscOps *Abops;
757     MatOps   Aops;
758 
759     /* This isn't really an in-place transpose, but free data struct from a */
760     PetscFree(a->rowners);
761     ierr = MatDestroy(a->A);CHKERRQ(ierr);
762     if (a->lvec) VecDestroy(a->lvec);
763     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
764     PetscFree(a);
765 
766     /*
767          This is horrible, horrible code. We need to keep the
768       A pointers for the bops and ops but copy everything
769       else from C.
770     */
771     Abops   = A->bops;
772     Aops    = A->ops;
773     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
774     A->bops = Abops;
775     A->ops  = Aops;
776 
777     PetscHeaderDestroy(B);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #include "pinclude/blaslapack.h"
783 #undef __FUNC__
784 #define __FUNC__ "MatScale_MPIDense"
785 int MatScale_MPIDense(Scalar *alpha,Mat inA)
786 {
787   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
788   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
789   int          one = 1, nz;
790 
791   PetscFunctionBegin;
792   nz = a->m*a->n;
793   BLscal_( &nz, alpha, a->v, &one );
794   PLogFlops(nz);
795   PetscFunctionReturn(0);
796 }
797 
798 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
799 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
800 
801 /* -------------------------------------------------------------------*/
802 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
803        MatGetRow_MPIDense,
804        MatRestoreRow_MPIDense,
805        MatMult_MPIDense,
806        MatMultAdd_MPIDense,
807        MatMultTrans_MPIDense,
808        MatMultTransAdd_MPIDense,
809        0,
810        0,
811        0,
812        0,
813        0,
814        0,
815        0,
816        MatTranspose_MPIDense,
817        MatGetInfo_MPIDense,0,
818        MatGetDiagonal_MPIDense,
819        0,
820        MatNorm_MPIDense,
821        MatAssemblyBegin_MPIDense,
822        MatAssemblyEnd_MPIDense,
823        0,
824        MatSetOption_MPIDense,
825        MatZeroEntries_MPIDense,
826        MatZeroRows_MPIDense,
827        0,
828        0,
829        0,
830        0,
831        MatGetSize_MPIDense,
832        MatGetLocalSize_MPIDense,
833        MatGetOwnershipRange_MPIDense,
834        0,
835        0,
836        MatGetArray_MPIDense,
837        MatRestoreArray_MPIDense,
838        MatDuplicate_MPIDense,
839        0,
840        0,
841        0,
842        0,
843        0,
844        MatGetSubMatrices_MPIDense,
845        0,
846        MatGetValues_MPIDense,
847        0,
848        0,
849        MatScale_MPIDense,
850        0,
851        0,
852        0,
853        MatGetBlockSize_MPIDense,
854        0,
855        0,
856        0,
857        0,
858        0,
859        0,
860        0,
861        0,
862        0,
863        0,
864        0,
865        0,
866        MatGetMaps_Petsc};
867 
868 #undef __FUNC__
869 #define __FUNC__ "MatCreateMPIDense"
870 /*@C
871    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
872 
873    Collective on MPI_Comm
874 
875    Input Parameters:
876 +  comm - MPI communicator
877 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
878 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
879 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
880 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
881 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
882    to control all matrix memory allocation.
883 
884    Output Parameter:
885 .  A - the matrix
886 
887    Notes:
888    The dense format is fully compatible with standard Fortran 77
889    storage by columns.
890 
891    The data input variable is intended primarily for Fortran programmers
892    who wish to allocate their own matrix memory space.  Most users should
893    set data=PETSC_NULL.
894 
895    The user MUST specify either the local or global matrix dimensions
896    (possibly both).
897 
898    Currently, the only parallel dense matrix decomposition is by rows,
899    so that n=N and each submatrix owns all of the global columns.
900 
901    Level: intermediate
902 
903 .keywords: matrix, dense, parallel
904 
905 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
906 @*/
907 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
908 {
909   Mat          mat;
910   Mat_MPIDense *a;
911   int          ierr, i,flg;
912 
913   PetscFunctionBegin;
914   /* Note:  For now, when data is specified above, this assumes the user correctly
915    allocates the local dense storage space.  We should add error checking. */
916 
917   *A = 0;
918   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
919   PLogObjectCreate(mat);
920   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
921   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
922   mat->ops->destroy = MatDestroy_MPIDense;
923   mat->ops->view    = MatView_MPIDense;
924   mat->factor       = 0;
925   mat->mapping      = 0;
926 
927   a->factor       = 0;
928   mat->insertmode = NOT_SET_VALUES;
929   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
930   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
931 
932   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
933 
934   /*
935      The computation of n is wrong below, n should represent the number of local
936      rows in the right (column vector)
937   */
938 
939   /* each row stores all columns */
940   if (N == PETSC_DECIDE) N = n;
941   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
942   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
943   a->N = mat->N = N;
944   a->M = mat->M = M;
945   a->m = mat->m = m;
946   a->n = mat->n = n;
947 
948   /* the information in the maps duplicates the information computed below, eventually
949      we should remove the duplicate information that is not contained in the maps */
950   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
951   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
952 
953   /* build local table of row and column ownerships */
954   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
955   a->cowners = a->rowners + a->size + 1;
956   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
957   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
958   a->rowners[0] = 0;
959   for ( i=2; i<=a->size; i++ ) {
960     a->rowners[i] += a->rowners[i-1];
961   }
962   a->rstart = a->rowners[a->rank];
963   a->rend   = a->rowners[a->rank+1];
964   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
965   a->cowners[0] = 0;
966   for ( i=2; i<=a->size; i++ ) {
967     a->cowners[i] += a->cowners[i-1];
968   }
969 
970   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
971   PLogObjectParent(mat,a->A);
972 
973   /* build cache for off array entries formed */
974   a->donotstash = 0;
975   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
976 
977   /* stuff used for matrix vector multiply */
978   a->lvec        = 0;
979   a->Mvctx       = 0;
980   a->roworiented = 1;
981 
982   *A = mat;
983   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
984   if (flg) {
985     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
986   }
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNC__
991 #define __FUNC__ "MatDuplicate_MPIDense"
992 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
993 {
994   Mat          mat;
995   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
996   int          ierr;
997   FactorCtx    *factor;
998 
999   PetscFunctionBegin;
1000   *newmat       = 0;
1001   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1002   PLogObjectCreate(mat);
1003   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1004   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1005   mat->ops->destroy = MatDestroy_MPIDense;
1006   mat->ops->view    = MatView_MPIDense;
1007   mat->factor       = A->factor;
1008   mat->assembled    = PETSC_TRUE;
1009 
1010   a->m = mat->m = oldmat->m;
1011   a->n = mat->n = oldmat->n;
1012   a->M = mat->M = oldmat->M;
1013   a->N = mat->N = oldmat->N;
1014   if (oldmat->factor) {
1015     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1016     /* copy factor contents ... add this code! */
1017   } else a->factor = 0;
1018 
1019   a->rstart       = oldmat->rstart;
1020   a->rend         = oldmat->rend;
1021   a->size         = oldmat->size;
1022   a->rank         = oldmat->rank;
1023   mat->insertmode = NOT_SET_VALUES;
1024   a->donotstash   = oldmat->donotstash;
1025   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1026   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1027   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1028   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1029 
1030   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1031   PLogObjectParent(mat,a->lvec);
1032   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1033   PLogObjectParent(mat,a->Mvctx);
1034   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1035   PLogObjectParent(mat,a->A);
1036   *newmat = mat;
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #include "sys.h"
1041 
1042 #undef __FUNC__
1043 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1044 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1045 {
1046   int        *rowners, i,size,rank,m,ierr,nz,j;
1047   Scalar     *array,*vals,*vals_ptr;
1048   MPI_Status status;
1049 
1050   PetscFunctionBegin;
1051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1053 
1054   /* determine ownership of all rows */
1055   m          = M/size + ((M % size) > rank);
1056   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1057   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1058   rowners[0] = 0;
1059   for ( i=2; i<=size; i++ ) {
1060     rowners[i] += rowners[i-1];
1061   }
1062 
1063   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1064   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1065 
1066   if (!rank) {
1067     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1068 
1069     /* read in my part of the matrix numerical values  */
1070     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1071 
1072     /* insert into matrix-by row (this is why cannot directly read into array */
1073     vals_ptr = vals;
1074     for ( i=0; i<m; i++ ) {
1075       for ( j=0; j<N; j++ ) {
1076         array[i + j*m] = *vals_ptr++;
1077       }
1078     }
1079 
1080     /* read in other processors and ship out */
1081     for ( i=1; i<size; i++ ) {
1082       nz   = (rowners[i+1] - rowners[i])*N;
1083       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1084       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1085     }
1086   } else {
1087     /* receive numeric values */
1088     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1089 
1090     /* receive message of values*/
1091     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1092 
1093     /* insert into matrix-by row (this is why cannot directly read into array */
1094     vals_ptr = vals;
1095     for ( i=0; i<m; i++ ) {
1096       for ( j=0; j<N; j++ ) {
1097         array[i + j*m] = *vals_ptr++;
1098       }
1099     }
1100   }
1101   PetscFree(rowners);
1102   PetscFree(vals);
1103   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ "MatLoad_MPIDense"
1111 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1112 {
1113   Mat          A;
1114   Scalar       *vals,*svals;
1115   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1116   MPI_Status   status;
1117   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1118   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1119   int          tag = ((PetscObject)viewer)->tag;
1120   int          i, nz, ierr, j,rstart, rend, fd;
1121 
1122   PetscFunctionBegin;
1123   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1125   if (!rank) {
1126     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1127     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1128     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1129   }
1130 
1131   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1132   M = header[1]; N = header[2]; nz = header[3];
1133 
1134   /*
1135        Handle case where matrix is stored on disk as a dense matrix
1136   */
1137   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1138     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1139     PetscFunctionReturn(0);
1140   }
1141 
1142   /* determine ownership of all rows */
1143   m          = M/size + ((M % size) > rank);
1144   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1145   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1146   rowners[0] = 0;
1147   for ( i=2; i<=size; i++ ) {
1148     rowners[i] += rowners[i-1];
1149   }
1150   rstart = rowners[rank];
1151   rend   = rowners[rank+1];
1152 
1153   /* distribute row lengths to all processors */
1154   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1155   offlens = ourlens + (rend-rstart);
1156   if (!rank) {
1157     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1158     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1159     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1160     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1161     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1162     PetscFree(sndcounts);
1163   } else {
1164     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1165   }
1166 
1167   if (!rank) {
1168     /* calculate the number of nonzeros on each processor */
1169     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1170     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1171     for ( i=0; i<size; i++ ) {
1172       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1173         procsnz[i] += rowlengths[j];
1174       }
1175     }
1176     PetscFree(rowlengths);
1177 
1178     /* determine max buffer needed and allocate it */
1179     maxnz = 0;
1180     for ( i=0; i<size; i++ ) {
1181       maxnz = PetscMax(maxnz,procsnz[i]);
1182     }
1183     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1184 
1185     /* read in my part of the matrix column indices  */
1186     nz = procsnz[0];
1187     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1188     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1189 
1190     /* read in every one elses and ship off */
1191     for ( i=1; i<size; i++ ) {
1192       nz   = procsnz[i];
1193       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1194       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1195     }
1196     PetscFree(cols);
1197   } else {
1198     /* determine buffer space needed for message */
1199     nz = 0;
1200     for ( i=0; i<m; i++ ) {
1201       nz += ourlens[i];
1202     }
1203     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1204 
1205     /* receive message of column indices*/
1206     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1207     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1208     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1209   }
1210 
1211   /* loop over local rows, determining number of off diagonal entries */
1212   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1213   jj = 0;
1214   for ( i=0; i<m; i++ ) {
1215     for ( j=0; j<ourlens[i]; j++ ) {
1216       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1217       jj++;
1218     }
1219   }
1220 
1221   /* create our matrix */
1222   for ( i=0; i<m; i++ ) {
1223     ourlens[i] -= offlens[i];
1224   }
1225   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1226   A = *newmat;
1227   for ( i=0; i<m; i++ ) {
1228     ourlens[i] += offlens[i];
1229   }
1230 
1231   if (!rank) {
1232     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1233 
1234     /* read in my part of the matrix numerical values  */
1235     nz = procsnz[0];
1236     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1237 
1238     /* insert into matrix */
1239     jj      = rstart;
1240     smycols = mycols;
1241     svals   = vals;
1242     for ( i=0; i<m; i++ ) {
1243       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1244       smycols += ourlens[i];
1245       svals   += ourlens[i];
1246       jj++;
1247     }
1248 
1249     /* read in other processors and ship out */
1250     for ( i=1; i<size; i++ ) {
1251       nz   = procsnz[i];
1252       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1253       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1254     }
1255     PetscFree(procsnz);
1256   } else {
1257     /* receive numeric values */
1258     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1259 
1260     /* receive message of values*/
1261     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1262     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1263     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1264 
1265     /* insert into matrix */
1266     jj      = rstart;
1267     smycols = mycols;
1268     svals   = vals;
1269     for ( i=0; i<m; i++ ) {
1270       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1271       smycols += ourlens[i];
1272       svals   += ourlens[i];
1273       jj++;
1274     }
1275   }
1276   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1277 
1278   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1279   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 
1284 
1285 
1286 
1287