xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 537820f0162acfd8680b9b7279e7ec2ac37be709)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.45 1996/08/08 14:42:42 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
13                                int *idxn,Scalar *v,InsertMode addv)
14 {
15   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
16   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
17   int          roworiented = A->roworiented;
18 
19   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
20     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
21   }
22   A->insertmode = addv;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
25     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
30       }
31       else {
32         for ( j=0; j<n; j++ ) {
33           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
34           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
35           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
36         }
37       }
38     }
39     else {
40       if (roworiented) {
41         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
42       }
43       else { /* must stash each seperately */
44         row = idxm[i];
45         for ( j=0; j<n; j++ ) {
46           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
47                  CHKERRQ(ierr);
48         }
49       }
50     }
51   }
52   return 0;
53 }
54 
55 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
56 {
57   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
58   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
59 
60   for ( i=0; i<m; i++ ) {
61     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
62     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
63     if (idxm[i] >= rstart && idxm[i] < rend) {
64       row = idxm[i] - rstart;
65       for ( j=0; j<n; j++ ) {
66         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
67         if (idxn[j] >= mdn->N)
68           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
69         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
70       }
71     }
72     else {
73       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
74     }
75   }
76   return 0;
77 }
78 
79 static int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int ierr;
83 
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   return 0;
86 }
87 
88 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
89 {
90   return 0;
91 }
92 
93 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
94 {
95   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
96   MPI_Comm     comm = mat->comm;
97   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
98   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
99   int          tag = mat->tag, *owner,*starts,count,ierr;
100   InsertMode   addv;
101   MPI_Request  *send_waits,*recv_waits;
102   Scalar       *rvalues,*svalues;
103 
104   /* make sure all processors are either in INSERTMODE or ADDMODE */
105   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
106   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
107     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
108     }
109   mdn->insertmode = addv; /* in case this processor had no cache */
110 
111   /*  first count number of contributors to each processor */
112   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
113   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
114   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
115   for ( i=0; i<mdn->stash.n; i++ ) {
116     idx = mdn->stash.idx[i];
117     for ( j=0; j<size; j++ ) {
118       if (idx >= owners[j] && idx < owners[j+1]) {
119         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
120       }
121     }
122   }
123   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
124 
125   /* inform other processors of number of messages and max length*/
126   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
127   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
128   nreceives = work[rank];
129   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
130   nmax = work[rank];
131   PetscFree(work);
132 
133   /* post receives:
134        1) each message will consist of ordered pairs
135      (global index,value) we store the global index as a double
136      to simplify the message passing.
137        2) since we don't know how long each individual message is we
138      allocate the largest needed buffer for each receive. Potentially
139      this is a lot of wasted space.
140 
141        This could be done better.
142   */
143   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
144   CHKPTRQ(rvalues);
145   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
146   CHKPTRQ(recv_waits);
147   for ( i=0; i<nreceives; i++ ) {
148     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
149               comm,recv_waits+i);
150   }
151 
152   /* do sends:
153       1) starts[i] gives the starting index in svalues for stuff going to
154          the ith processor
155   */
156   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
157   CHKPTRQ(svalues);
158   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
159   CHKPTRQ(send_waits);
160   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   for ( i=0; i<mdn->stash.n; i++ ) {
164     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
165     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
166     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
167   }
168   PetscFree(owner);
169   starts[0] = 0;
170   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
171   count = 0;
172   for ( i=0; i<size; i++ ) {
173     if (procs[i]) {
174       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
175                 comm,send_waits+count++);
176     }
177   }
178   PetscFree(starts); PetscFree(nprocs);
179 
180   /* Free cache space */
181   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
182 
183   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
184   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
185   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
186   mdn->rmax       = nmax;
187 
188   return 0;
189 }
190 extern int MatSetUpMultiply_MPIDense(Mat);
191 
192 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
193 {
194   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
195   MPI_Status   *send_status,recv_status;
196   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
197   Scalar       *values,val;
198   InsertMode   addv = mdn->insertmode;
199 
200   /*  wait on receives */
201   while (count) {
202     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
203     /* unpack receives into our local space */
204     values = mdn->rvalues + 3*imdex*mdn->rmax;
205     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
206     n = n/3;
207     for ( i=0; i<n; i++ ) {
208       row = (int) PetscReal(values[3*i]) - mdn->rstart;
209       col = (int) PetscReal(values[3*i+1]);
210       val = values[3*i+2];
211       if (col >= 0 && col < mdn->N) {
212         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
213       }
214       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
215     }
216     count--;
217   }
218   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
219 
220   /* wait on sends */
221   if (mdn->nsends) {
222     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
223     CHKPTRQ(send_status);
224     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
225     PetscFree(send_status);
226   }
227   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
228 
229   mdn->insertmode = NOT_SET_VALUES;
230   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
235   }
236   return 0;
237 }
238 
239 static int MatZeroEntries_MPIDense(Mat A)
240 {
241   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
242   return MatZeroEntries(l->A);
243 }
244 
245 /* the code does not do the diagonal entries correctly unless the
246    matrix is square and the column and row owerships are identical.
247    This is a BUG. The only way to fix it seems to be to access
248    mdn->A and mdn->B directly and not through the MatZeroRows()
249    routine.
250 */
251 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
252 {
253   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
254   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
255   int            *procs,*nprocs,j,found,idx,nsends,*work;
256   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
257   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
258   int            *lens,imdex,*lrows,*values;
259   MPI_Comm       comm = A->comm;
260   MPI_Request    *send_waits,*recv_waits;
261   MPI_Status     recv_status,*send_status;
262   IS             istmp;
263 
264   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
265   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
266 
267   /*  first count number of contributors to each processor */
268   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
269   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
270   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
271   for ( i=0; i<N; i++ ) {
272     idx = rows[i];
273     found = 0;
274     for ( j=0; j<size; j++ ) {
275       if (idx >= owners[j] && idx < owners[j+1]) {
276         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
277       }
278     }
279     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
280   }
281   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
282 
283   /* inform other processors of number of messages and max length*/
284   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
285   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
286   nrecvs = work[rank];
287   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
288   nmax = work[rank];
289   PetscFree(work);
290 
291   /* post receives:   */
292   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
293   CHKPTRQ(rvalues);
294   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
295   CHKPTRQ(recv_waits);
296   for ( i=0; i<nrecvs; i++ ) {
297     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
298   }
299 
300   /* do sends:
301       1) starts[i] gives the starting index in svalues for stuff going to
302          the ith processor
303   */
304   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
305   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
306   CHKPTRQ(send_waits);
307   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
308   starts[0] = 0;
309   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
310   for ( i=0; i<N; i++ ) {
311     svalues[starts[owner[i]]++] = rows[i];
312   }
313   ISRestoreIndices(is,&rows);
314 
315   starts[0] = 0;
316   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
317   count = 0;
318   for ( i=0; i<size; i++ ) {
319     if (procs[i]) {
320       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
321     }
322   }
323   PetscFree(starts);
324 
325   base = owners[rank];
326 
327   /*  wait on receives */
328   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
329   source = lens + nrecvs;
330   count  = nrecvs; slen = 0;
331   while (count) {
332     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
333     /* unpack receives into our local space */
334     MPI_Get_count(&recv_status,MPI_INT,&n);
335     source[imdex]  = recv_status.MPI_SOURCE;
336     lens[imdex]  = n;
337     slen += n;
338     count--;
339   }
340   PetscFree(recv_waits);
341 
342   /* move the data into the send scatter */
343   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
344   count = 0;
345   for ( i=0; i<nrecvs; i++ ) {
346     values = rvalues + i*nmax;
347     for ( j=0; j<lens[i]; j++ ) {
348       lrows[count++] = values[j] - base;
349     }
350   }
351   PetscFree(rvalues); PetscFree(lens);
352   PetscFree(owner); PetscFree(nprocs);
353 
354   /* actually zap the local rows */
355   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
356   PLogObjectParent(A,istmp);
357   PetscFree(lrows);
358   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
359   ierr = ISDestroy(istmp); CHKERRQ(ierr);
360 
361   /* wait on sends */
362   if (nsends) {
363     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
364     CHKPTRQ(send_status);
365     MPI_Waitall(nsends,send_waits,send_status);
366     PetscFree(send_status);
367   }
368   PetscFree(send_waits); PetscFree(svalues);
369 
370   return 0;
371 }
372 
373 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
374 {
375   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
376   int          ierr;
377 
378   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
379   CHKERRQ(ierr);
380   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
381   CHKERRQ(ierr);
382   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
383   return 0;
384 }
385 
386 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
387 {
388   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
389   int          ierr;
390 
391   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
393   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
394   return 0;
395 }
396 
397 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
398 {
399   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
400   int          ierr;
401   Scalar       zero = 0.0;
402 
403   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
404   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
405   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
406   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
407   return 0;
408 }
409 
410 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
413   int          ierr;
414 
415   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
416   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
417   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
418   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
419   return 0;
420 }
421 
422 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
423 {
424   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
425   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
426   int          ierr, len, i, n, m = a->m, radd;
427   Scalar       *x, zero = 0.0;
428 
429   VecSet(&zero,v);
430   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
431   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
432   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
433   len = PetscMin(aloc->m,aloc->n);
434   radd = a->rstart*m;
435   for ( i=0; i<len; i++ ) {
436     x[i] = aloc->v[radd + i*m + i];
437   }
438   return 0;
439 }
440 
441 static int MatDestroy_MPIDense(PetscObject obj)
442 {
443   Mat          mat = (Mat) obj;
444   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
445   int          ierr;
446 
447 #if defined(PETSC_LOG)
448   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
449 #endif
450   PetscFree(mdn->rowners);
451   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
452   if (mdn->lvec)   VecDestroy(mdn->lvec);
453   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
454   if (mdn->factor) {
455     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
456     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
457     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
458     PetscFree(mdn->factor);
459   }
460   PetscFree(mdn);
461   PLogObjectDestroy(mat);
462   PetscHeaderDestroy(mat);
463   return 0;
464 }
465 
466 #include "pinclude/pviewer.h"
467 
468 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
469 {
470   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
471   int          ierr;
472   if (mdn->size == 1) {
473     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
474   }
475   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
476   return 0;
477 }
478 
479 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
480 {
481   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
482   int          ierr, format;
483   FILE         *fd;
484   ViewerType   vtype;
485 
486   ViewerGetType(viewer,&vtype);
487   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
488   ierr = ViewerGetFormat(viewer,&format);
489   if (format == ASCII_FORMAT_INFO_DETAILED) {
490     int nz, nzalloc, mem, rank;
491     MPI_Comm_rank(mat->comm,&rank);
492     ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
493     PetscSequentialPhaseBegin(mat->comm,1);
494       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
495           rank,mdn->m,nz,nzalloc,mem);
496       fflush(fd);
497     PetscSequentialPhaseEnd(mat->comm,1);
498     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
499     return 0;
500   }
501   else if (format == ASCII_FORMAT_INFO) {
502     return 0;
503   }
504   if (vtype == ASCII_FILE_VIEWER) {
505     PetscSequentialPhaseBegin(mat->comm,1);
506     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
507              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
508     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
509     fflush(fd);
510     PetscSequentialPhaseEnd(mat->comm,1);
511   }
512   else {
513     int size = mdn->size, rank = mdn->rank;
514     if (size == 1) {
515       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
516     }
517     else {
518       /* assemble the entire matrix onto first processor. */
519       Mat          A;
520       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
521       Scalar       *vals;
522       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
523 
524       if (!rank) {
525         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
526       }
527       else {
528         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
529       }
530       PLogObjectParent(mat,A);
531 
532       /* Copy the matrix ... This isn't the most efficient means,
533          but it's quick for now */
534       row = mdn->rstart; m = Amdn->m;
535       for ( i=0; i<m; i++ ) {
536         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
537         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
538         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
539         row++;
540       }
541 
542       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
543       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
544       if (!rank) {
545         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
546       }
547       ierr = MatDestroy(A); CHKERRQ(ierr);
548     }
549   }
550   return 0;
551 }
552 
553 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
554 {
555   Mat          mat = (Mat) obj;
556   int          ierr;
557   ViewerType   vtype;
558 
559   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
560   if (vtype == ASCII_FILE_VIEWER) {
561     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
562   }
563   else if (vtype == ASCII_FILES_VIEWER) {
564     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
565   }
566   else if (vtype == BINARY_FILE_VIEWER) {
567     return MatView_MPIDense_Binary(mat,viewer);
568   }
569   return 0;
570 }
571 
572 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
573                              int *nzalloc,int *mem)
574 {
575   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
576   Mat          mdn = mat->A;
577   int          ierr, isend[3], irecv[3];
578 
579   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
580   if (flag == MAT_LOCAL) {
581     if (nz)      *nz      = isend[0];
582     if (nzalloc) *nzalloc = isend[1];
583     if (mem)     *mem     = isend[2];
584   } else if (flag == MAT_GLOBAL_MAX) {
585     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
586     if (nz)      *nz      = irecv[0];
587     if (nzalloc) *nzalloc = irecv[1];
588     if (mem)     *mem     = irecv[2];
589   } else if (flag == MAT_GLOBAL_SUM) {
590     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
591     if (nz)      *nz      = irecv[0];
592     if (nzalloc) *nzalloc = irecv[1];
593     if (mem)     *mem     = irecv[2];
594   }
595   return 0;
596 }
597 
598 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
599    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
600    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
601    extern int MatSolve_MPIDense(Mat,Vec,Vec);
602    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
603    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
604    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
605 
606 static int MatSetOption_MPIDense(Mat A,MatOption op)
607 {
608   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
609 
610   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
611       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
612       op == MAT_COLUMNS_SORTED ||
613       op == MAT_ROW_ORIENTED) {
614         MatSetOption(a->A,op);
615   }
616   else if (op == MAT_ROWS_SORTED ||
617            op == MAT_SYMMETRIC ||
618            op == MAT_STRUCTURALLY_SYMMETRIC ||
619            op == MAT_YES_NEW_DIAGONALS)
620     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
621   else if (op == MAT_COLUMN_ORIENTED)
622     { a->roworiented = 0; MatSetOption(a->A,op);}
623   else if (op == MAT_NO_NEW_DIAGONALS)
624     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");}
625   else
626     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
627   return 0;
628 }
629 
630 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
631 {
632   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
633   *m = mat->M; *n = mat->N;
634   return 0;
635 }
636 
637 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
638 {
639   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
640   *m = mat->m; *n = mat->N;
641   return 0;
642 }
643 
644 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
645 {
646   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
647   *m = mat->rstart; *n = mat->rend;
648   return 0;
649 }
650 
651 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
652 {
653   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
654   int          lrow, rstart = mat->rstart, rend = mat->rend;
655 
656   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
657   lrow = row - rstart;
658   return MatGetRow(mat->A,lrow,nz,idx,v);
659 }
660 
661 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
662 {
663   if (idx) PetscFree(*idx);
664   if (v) PetscFree(*v);
665   return 0;
666 }
667 
668 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
669 {
670   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
671   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
672   int          ierr, i, j;
673   double       sum = 0.0;
674   Scalar       *v = mat->v;
675 
676   if (mdn->size == 1) {
677     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       for (i=0; i<mat->n*mat->m; i++ ) {
681 #if defined(PETSC_COMPLEX)
682         sum += real(conj(*v)*(*v)); v++;
683 #else
684         sum += (*v)*(*v); v++;
685 #endif
686       }
687       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
688       *norm = sqrt(*norm);
689       PLogFlops(2*mat->n*mat->m);
690     }
691     else if (type == NORM_1) {
692       double *tmp, *tmp2;
693       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
694       tmp2 = tmp + mdn->N;
695       PetscMemzero(tmp,2*mdn->N*sizeof(double));
696       *norm = 0.0;
697       v = mat->v;
698       for ( j=0; j<mat->n; j++ ) {
699         for ( i=0; i<mat->m; i++ ) {
700           tmp[j] += PetscAbsScalar(*v);  v++;
701         }
702       }
703       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
704       for ( j=0; j<mdn->N; j++ ) {
705         if (tmp2[j] > *norm) *norm = tmp2[j];
706       }
707       PetscFree(tmp);
708       PLogFlops(mat->n*mat->m);
709     }
710     else if (type == NORM_INFINITY) { /* max row norm */
711       double ntemp;
712       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
713       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
714     }
715     else {
716       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
717     }
718   }
719   return 0;
720 }
721 
722 static int MatTranspose_MPIDense(Mat A,Mat *matout)
723 {
724   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
725   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
726   Mat          B;
727   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
728   int          j, i, ierr;
729   Scalar       *v;
730 
731   if (matout == PETSC_NULL && M != N)
732     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
733   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
734          CHKERRQ(ierr);
735 
736   m = Aloc->m; n = Aloc->n; v = Aloc->v;
737   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
738   for ( j=0; j<n; j++ ) {
739     for (i=0; i<m; i++) rwork[i] = rstart + i;
740     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
741     v += m;
742   }
743   PetscFree(rwork);
744   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
745   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
746   if (matout != PETSC_NULL) {
747     *matout = B;
748   } else {
749     /* This isn't really an in-place transpose, but free data struct from a */
750     PetscFree(a->rowners);
751     ierr = MatDestroy(a->A); CHKERRQ(ierr);
752     if (a->lvec) VecDestroy(a->lvec);
753     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
754     PetscFree(a);
755     PetscMemcpy(A,B,sizeof(struct _Mat));
756     PetscHeaderDestroy(B);
757   }
758   return 0;
759 }
760 
761 #include "pinclude/plapack.h"
762 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
763 {
764   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
765   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
766   int          one = 1, nz;
767 
768   nz = a->m*a->n;
769   BLscal_( &nz, alpha, a->v, &one );
770   PLogFlops(nz);
771   return 0;
772 }
773 
774 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
775 
776 /* -------------------------------------------------------------------*/
777 static struct _MatOps MatOps = {MatSetValues_MPIDense,
778        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
779        MatMult_MPIDense,MatMultAdd_MPIDense,
780        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
781 /*       MatSolve_MPIDense,0, */
782        0,0,
783        0,0,
784        0,0,
785 /*       MatLUFactor_MPIDense,0, */
786        0,MatTranspose_MPIDense,
787        MatGetInfo_MPIDense,0,
788        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
789        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
790        0,
791        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
792        0,0,0,
793 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
794        0,0,
795        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
796        MatGetOwnershipRange_MPIDense,
797        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
798        0,MatConvertSameType_MPIDense,
799        0,0,0,0,0,
800        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense};
801 
802 /*@C
803    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
804 
805    Input Parameters:
806 .  comm - MPI communicator
807 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
808 .  n - number of local columns (or PETSC_DECIDE to have calculated
809            if N is given)
810 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
811 .  N - number of global columns (or PETSC_DECIDE to have calculated
812            if n is given)
813 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
814    to control all matrix memory allocation.
815 
816    Output Parameter:
817 .  A - the matrix
818 
819    Notes:
820    The dense format is fully compatible with standard Fortran 77
821    storage by columns.
822 
823    The data input variable is intended primarily for Fortran programmers
824    who wish to allocate their own matrix memory space.  Most users should
825    set data=PETSC_NULL.
826 
827    The user MUST specify either the local or global matrix dimensions
828    (possibly both).
829 
830    Currently, the only parallel dense matrix decomposition is by rows,
831    so that n=N and each submatrix owns all of the global columns.
832 
833 .keywords: matrix, dense, parallel
834 
835 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
836 @*/
837 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
838 {
839   Mat          mat;
840   Mat_MPIDense *a;
841   int          ierr, i,flg;
842 
843   /* Note:  For now, when data is specified above, this assumes the user correctly
844    allocates the local dense storage space.  We should add error checking. */
845 
846   *A = 0;
847   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
848   PLogObjectCreate(mat);
849   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
850   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
851   mat->destroy    = MatDestroy_MPIDense;
852   mat->view       = MatView_MPIDense;
853   mat->factor     = 0;
854 
855   a->factor = 0;
856   a->insertmode = NOT_SET_VALUES;
857   MPI_Comm_rank(comm,&a->rank);
858   MPI_Comm_size(comm,&a->size);
859 
860   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
861   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
862 
863   /* each row stores all columns */
864   if (N == PETSC_DECIDE) N = n;
865   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
866   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
867   a->N = mat->N = N;
868   a->M = mat->M = M;
869   a->m = mat->m = m;
870   a->n = mat->n = n;
871 
872   /* build local table of row and column ownerships */
873   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
874   a->cowners = a->rowners + a->size + 1;
875   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
876                        sizeof(Mat_MPIDense));
877   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
878   a->rowners[0] = 0;
879   for ( i=2; i<=a->size; i++ ) {
880     a->rowners[i] += a->rowners[i-1];
881   }
882   a->rstart = a->rowners[a->rank];
883   a->rend   = a->rowners[a->rank+1];
884   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
885   a->cowners[0] = 0;
886   for ( i=2; i<=a->size; i++ ) {
887     a->cowners[i] += a->cowners[i-1];
888   }
889 
890   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
891   PLogObjectParent(mat,a->A);
892 
893   /* build cache for off array entries formed */
894   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
895 
896   /* stuff used for matrix vector multiply */
897   a->lvec        = 0;
898   a->Mvctx       = 0;
899   a->roworiented = 1;
900 
901   *A = mat;
902   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
903   if (flg) {
904     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
905   }
906   return 0;
907 }
908 
909 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
910 {
911   Mat          mat;
912   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
913   int          ierr;
914   FactorCtx    *factor;
915 
916   *newmat       = 0;
917   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
918   PLogObjectCreate(mat);
919   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
920   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
921   mat->destroy  = MatDestroy_MPIDense;
922   mat->view     = MatView_MPIDense;
923   mat->factor   = A->factor;
924   mat->assembled  = PETSC_TRUE;
925 
926   a->m = mat->m = oldmat->m;
927   a->n = mat->n = oldmat->n;
928   a->M = mat->M = oldmat->M;
929   a->N = mat->N = oldmat->N;
930   if (oldmat->factor) {
931     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
932     /* copy factor contents ... add this code! */
933   } else a->factor = 0;
934 
935   a->rstart     = oldmat->rstart;
936   a->rend       = oldmat->rend;
937   a->size       = oldmat->size;
938   a->rank       = oldmat->rank;
939   a->insertmode = NOT_SET_VALUES;
940 
941   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
942   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
943   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
944   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
945 
946   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
947   PLogObjectParent(mat,a->lvec);
948   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
949   PLogObjectParent(mat,a->Mvctx);
950   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
951   PLogObjectParent(mat,a->A);
952   *newmat = mat;
953   return 0;
954 }
955 
956 #include "sys.h"
957 
958 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
959 {
960   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
961   Scalar     *array,*vals,*vals_ptr;
962   MPI_Status status;
963 
964   MPI_Comm_rank(comm,&rank);
965   MPI_Comm_size(comm,&size);
966 
967   /* determine ownership of all rows */
968   m = M/size + ((M % size) > rank);
969   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
970   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
971   rowners[0] = 0;
972   for ( i=2; i<=size; i++ ) {
973     rowners[i] += rowners[i-1];
974   }
975   rstart = rowners[rank];
976   rend   = rowners[rank+1];
977 
978   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
979   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
980 
981   if (!rank) {
982     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
983 
984     /* read in my part of the matrix numerical values  */
985     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
986 
987     /* insert into matrix-by row (this is why cannot directly read into array */
988     vals_ptr = vals;
989     for ( i=0; i<m; i++ ) {
990       for ( j=0; j<N; j++ ) {
991         array[i + j*m] = *vals_ptr++;
992       }
993     }
994 
995     /* read in other processors and ship out */
996     for ( i=1; i<size; i++ ) {
997       nz   = (rowners[i+1] - rowners[i])*N;
998       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
999       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1000     }
1001   }
1002   else {
1003     /* receive numeric values */
1004     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1005 
1006     /* receive message of values*/
1007     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1008 
1009     /* insert into matrix-by row (this is why cannot directly read into array */
1010     vals_ptr = vals;
1011     for ( i=0; i<m; i++ ) {
1012       for ( j=0; j<N; j++ ) {
1013         array[i + j*m] = *vals_ptr++;
1014       }
1015     }
1016   }
1017   PetscFree(rowners);
1018   PetscFree(vals);
1019   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1020   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1021   return 0;
1022 }
1023 
1024 
1025 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1026 {
1027   Mat          A;
1028   int          i, nz, ierr, j,rstart, rend, fd;
1029   Scalar       *vals,*svals;
1030   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1031   MPI_Status   status;
1032   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1033   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1034   int          tag = ((PetscObject)viewer)->tag;
1035 
1036   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1037   if (!rank) {
1038     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1039     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1040     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
1041   }
1042 
1043   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1044   M = header[1]; N = header[2]; nz = header[3];
1045 
1046   /*
1047        Handle case where matrix is stored on disk as a dense matrix
1048   */
1049   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1050     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1051   }
1052 
1053   /* determine ownership of all rows */
1054   m = M/size + ((M % size) > rank);
1055   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1056   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1057   rowners[0] = 0;
1058   for ( i=2; i<=size; i++ ) {
1059     rowners[i] += rowners[i-1];
1060   }
1061   rstart = rowners[rank];
1062   rend   = rowners[rank+1];
1063 
1064   /* distribute row lengths to all processors */
1065   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1066   offlens = ourlens + (rend-rstart);
1067   if (!rank) {
1068     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1069     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1070     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1071     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1072     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1073     PetscFree(sndcounts);
1074   }
1075   else {
1076     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1077   }
1078 
1079   if (!rank) {
1080     /* calculate the number of nonzeros on each processor */
1081     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1082     PetscMemzero(procsnz,size*sizeof(int));
1083     for ( i=0; i<size; i++ ) {
1084       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1085         procsnz[i] += rowlengths[j];
1086       }
1087     }
1088     PetscFree(rowlengths);
1089 
1090     /* determine max buffer needed and allocate it */
1091     maxnz = 0;
1092     for ( i=0; i<size; i++ ) {
1093       maxnz = PetscMax(maxnz,procsnz[i]);
1094     }
1095     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1096 
1097     /* read in my part of the matrix column indices  */
1098     nz = procsnz[0];
1099     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1100     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1101 
1102     /* read in every one elses and ship off */
1103     for ( i=1; i<size; i++ ) {
1104       nz = procsnz[i];
1105       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1106       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1107     }
1108     PetscFree(cols);
1109   }
1110   else {
1111     /* determine buffer space needed for message */
1112     nz = 0;
1113     for ( i=0; i<m; i++ ) {
1114       nz += ourlens[i];
1115     }
1116     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1117 
1118     /* receive message of column indices*/
1119     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1120     MPI_Get_count(&status,MPI_INT,&maxnz);
1121     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1122   }
1123 
1124   /* loop over local rows, determining number of off diagonal entries */
1125   PetscMemzero(offlens,m*sizeof(int));
1126   jj = 0;
1127   for ( i=0; i<m; i++ ) {
1128     for ( j=0; j<ourlens[i]; j++ ) {
1129       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1130       jj++;
1131     }
1132   }
1133 
1134   /* create our matrix */
1135   for ( i=0; i<m; i++ ) {
1136     ourlens[i] -= offlens[i];
1137   }
1138   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1139   A = *newmat;
1140   for ( i=0; i<m; i++ ) {
1141     ourlens[i] += offlens[i];
1142   }
1143 
1144   if (!rank) {
1145     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1146 
1147     /* read in my part of the matrix numerical values  */
1148     nz = procsnz[0];
1149     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1150 
1151     /* insert into matrix */
1152     jj      = rstart;
1153     smycols = mycols;
1154     svals   = vals;
1155     for ( i=0; i<m; i++ ) {
1156       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1157       smycols += ourlens[i];
1158       svals   += ourlens[i];
1159       jj++;
1160     }
1161 
1162     /* read in other processors and ship out */
1163     for ( i=1; i<size; i++ ) {
1164       nz = procsnz[i];
1165       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1166       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1167     }
1168     PetscFree(procsnz);
1169   }
1170   else {
1171     /* receive numeric values */
1172     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1173 
1174     /* receive message of values*/
1175     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1176     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1177     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1178 
1179     /* insert into matrix */
1180     jj      = rstart;
1181     smycols = mycols;
1182     svals   = vals;
1183     for ( i=0; i<m; i++ ) {
1184       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1185       smycols += ourlens[i];
1186       svals   += ourlens[i];
1187       jj++;
1188     }
1189   }
1190   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1191 
1192   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1193   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1194   return 0;
1195 }
1196 
1197 
1198 
1199 
1200 
1201