xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d9f96c7ca71afc96fd7309d95e12b2b98fa1305e)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.3 1995/10/22 04:19:52 bsmith Exp curfman $";
3 #endif
4 
5 #include "mpidense.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
10                                int *idxn,Scalar *v,InsertMode addv)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
13   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
14 
15   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
16     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
17   }
18   mdn->insertmode = addv;
19   for ( i=0; i<m; i++ ) {
20     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
21     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
22     if (idxm[i] >= rstart && idxm[i] < rend) {
23       row = idxm[i] - rstart;
24       for ( j=0; j<n; j++ ) {
25         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
26         if (idxn[j] >= mdn->N)
27           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
28         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
29         CHKERRQ(ierr);
30       }
31     }
32     else {
33       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
34       CHKERRQ(ierr);
35     }
36   }
37   return 0;
38 }
39 
40 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
41 {
42   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
43   MPI_Comm     comm = mat->comm;
44   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
45   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
46   int          tag = mat->tag, *owner,*starts,count,ierr;
47   InsertMode   addv;
48   MPI_Request  *send_waits,*recv_waits;
49   Scalar       *rvalues,*svalues;
50 
51   /* make sure all processors are either in INSERTMODE or ADDMODE */
52   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
53                 MPI_BOR,comm);
54   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
55     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
56     }
57   mdn->insertmode = addv; /* in case this processor had no cache */
58 
59   /*  first count number of contributors to each processor */
60   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
61   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
62   owner = (int *) PETSCMALLOC( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
63   for ( i=0; i<mdn->stash.n; i++ ) {
64     idx = mdn->stash.idx[i];
65     for ( j=0; j<size; j++ ) {
66       if (idx >= owners[j] && idx < owners[j+1]) {
67         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
68       }
69     }
70   }
71   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
72 
73   /* inform other processors of number of messages and max length*/
74   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
75   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
76   nreceives = work[rank];
77   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
78   nmax = work[rank];
79   PETSCFREE(work);
80 
81   /* post receives:
82        1) each message will consist of ordered pairs
83      (global index,value) we store the global index as a double
84      to simplify the message passing.
85        2) since we don't know how long each individual message is we
86      allocate the largest needed buffer for each receive. Potentially
87      this is a lot of wasted space.
88 
89        This could be done better.
90   */
91   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
92   CHKPTRQ(rvalues);
93   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
94   CHKPTRQ(recv_waits);
95   for ( i=0; i<nreceives; i++ ) {
96     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
97               comm,recv_waits+i);
98   }
99 
100   /* do sends:
101       1) starts[i] gives the starting index in svalues for stuff going to
102          the ith processor
103   */
104   svalues = (Scalar *) PETSCMALLOC( 3*(mdn->stash.n+1)*sizeof(Scalar) );
105   CHKPTRQ(svalues);
106   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
107   CHKPTRQ(send_waits);
108   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
109   starts[0] = 0;
110   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
111   for ( i=0; i<mdn->stash.n; i++ ) {
112     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
113     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
114     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
115   }
116   PETSCFREE(owner);
117   starts[0] = 0;
118   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
119   count = 0;
120   for ( i=0; i<size; i++ ) {
121     if (procs[i]) {
122       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
123                 comm,send_waits+count++);
124     }
125   }
126   PETSCFREE(starts); PETSCFREE(nprocs);
127 
128   /* Free cache space */
129   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
130 
131   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
132   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
133   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
134   mdn->rmax       = nmax;
135 
136   return 0;
137 }
138 extern int MatSetUpMultiply_MPIDense(Mat);
139 
140 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
141 {
142   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
143   MPI_Status   *send_status,recv_status;
144   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
145   Scalar       *values,val;
146   InsertMode   addv = mdn->insertmode;
147 
148   /*  wait on receives */
149   while (count) {
150     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
151     /* unpack receives into our local space */
152     values = mdn->rvalues + 3*imdex*mdn->rmax;
153     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
154     n = n/3;
155     for ( i=0; i<n; i++ ) {
156       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
157       col = (int) PETSCREAL(values[3*i+1]);
158       val = values[3*i+2];
159       if (col >= 0 && col < mdn->N) {
160         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
161       }
162       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
163     }
164     count--;
165   }
166   PETSCFREE(mdn->recv_waits); PETSCFREE(mdn->rvalues);
167 
168   /* wait on sends */
169   if (mdn->nsends) {
170     send_status = (MPI_Status *) PETSCMALLOC( mdn->nsends*sizeof(MPI_Status) );
171     CHKPTRQ(send_status);
172     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
173     PETSCFREE(send_status);
174   }
175   PETSCFREE(mdn->send_waits); PETSCFREE(mdn->svalues);
176 
177   mdn->insertmode = NOT_SET_VALUES;
178   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
179   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
180 
181   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
182     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
183   }
184   mdn->assembled = 1;
185   return 0;
186 }
187 
188 static int MatZeroEntries_MPIDense(Mat A)
189 {
190   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
191   return MatZeroEntries(l->A);
192 }
193 
194 /* the code does not do the diagonal entries correctly unless the
195    matrix is square and the column and row owerships are identical.
196    This is a BUG. The only way to fix it seems to be to access
197    aij->A and aij->B directly and not through the MatZeroRows()
198    routine.
199 */
200 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
201 {
202   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
203   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
204   int            *procs,*nprocs,j,found,idx,nsends,*work;
205   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
206   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
207   int            *lens,imdex,*lrows,*values;
208   MPI_Comm       comm = A->comm;
209   MPI_Request    *send_waits,*recv_waits;
210   MPI_Status     recv_status,*send_status;
211   IS             istmp;
212 
213   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
214   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
215   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
216 
217   /*  first count number of contributors to each processor */
218   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
219   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
220   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
221   for ( i=0; i<N; i++ ) {
222     idx = rows[i];
223     found = 0;
224     for ( j=0; j<size; j++ ) {
225       if (idx >= owners[j] && idx < owners[j+1]) {
226         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
227       }
228     }
229     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
230   }
231   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
232 
233   /* inform other processors of number of messages and max length*/
234   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
235   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
236   nrecvs = work[rank];
237   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
238   nmax = work[rank];
239   PETSCFREE(work);
240 
241   /* post receives:   */
242   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
243   CHKPTRQ(rvalues);
244   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
245   CHKPTRQ(recv_waits);
246   for ( i=0; i<nrecvs; i++ ) {
247     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
248   }
249 
250   /* do sends:
251       1) starts[i] gives the starting index in svalues for stuff going to
252          the ith processor
253   */
254   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
255   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
256   CHKPTRQ(send_waits);
257   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
258   starts[0] = 0;
259   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
260   for ( i=0; i<N; i++ ) {
261     svalues[starts[owner[i]]++] = rows[i];
262   }
263   ISRestoreIndices(is,&rows);
264 
265   starts[0] = 0;
266   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
267   count = 0;
268   for ( i=0; i<size; i++ ) {
269     if (procs[i]) {
270       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
271     }
272   }
273   PETSCFREE(starts);
274 
275   base = owners[rank];
276 
277   /*  wait on receives */
278   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
279   source = lens + nrecvs;
280   count  = nrecvs; slen = 0;
281   while (count) {
282     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
283     /* unpack receives into our local space */
284     MPI_Get_count(&recv_status,MPI_INT,&n);
285     source[imdex]  = recv_status.MPI_SOURCE;
286     lens[imdex]  = n;
287     slen += n;
288     count--;
289   }
290   PETSCFREE(recv_waits);
291 
292   /* move the data into the send scatter */
293   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
294   count = 0;
295   for ( i=0; i<nrecvs; i++ ) {
296     values = rvalues + i*nmax;
297     for ( j=0; j<lens[i]; j++ ) {
298       lrows[count++] = values[j] - base;
299     }
300   }
301   PETSCFREE(rvalues); PETSCFREE(lens);
302   PETSCFREE(owner); PETSCFREE(nprocs);
303 
304   /* actually zap the local rows */
305   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
306   PLogObjectParent(A,istmp);
307   PETSCFREE(lrows);
308   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
309   ierr = ISDestroy(istmp); CHKERRQ(ierr);
310 
311   /* wait on sends */
312   if (nsends) {
313     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
314     CHKPTRQ(send_status);
315     MPI_Waitall(nsends,send_waits,send_status);
316     PETSCFREE(send_status);
317   }
318   PETSCFREE(send_waits); PETSCFREE(svalues);
319 
320   return 0;
321 }
322 
323 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
324 {
325   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
326   int          ierr;
327   if (!mdn->assembled)
328     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
329   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
330   CHKERRQ(ierr);
331   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
332   CHKERRQ(ierr);
333   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
334   return 0;
335 }
336 
337 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
338 {
339   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
340   int          ierr;
341   if (!mdn->assembled)
342     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
343   ierr = VecScatterBegin(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx);
344   CHKERRQ(ierr);
345   ierr = VecScatterEnd(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx);
346   CHKERRQ(ierr);
347   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
348   return 0;
349 }
350 
351 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
354   int          ierr;
355 
356   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
357   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
358   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
359          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
360   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
361          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
362   return 0;
363 }
364 
365 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
366 {
367   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
368   int          ierr;
369 
370   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
371   ierr = MatMultTransAdd(a->A,xx,yy,a->lvec); CHKERRQ(ierr);
372   /* send it on its way */
373   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
374          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
375   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
376          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
377   return 0;
378 }
379 
380 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
381 {
382   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
383   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
384   int          ierr, i, n, m = a->m, radd;
385   Scalar       *x;
386   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
387   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
388   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
389   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
390   radd = a->rstart*m*m;
391   for ( i=0; i<m; i++ ) {
392     x[i] = aloc->v[radd + i*m + i];
393   }
394   return 0;
395 }
396 
397 static int MatDestroy_MPIDense(PetscObject obj)
398 {
399   Mat          mat = (Mat) obj;
400   Mat_MPIDense *aij = (Mat_MPIDense *) mat->data;
401   int          ierr;
402 #if defined(PETSC_LOG)
403   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
404 #endif
405   PETSCFREE(aij->rowners);
406   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
407   if (aij->lvec)   VecDestroy(aij->lvec);
408   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
409   PETSCFREE(aij);
410   PLogObjectDestroy(mat);
411   PETSCHEADERDESTROY(mat);
412   return 0;
413 }
414 
415 #include "pinclude/pviewer.h"
416 
417 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
418 {
419   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
420   int          ierr;
421   if (mdn->size == 1) {
422     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
423   }
424   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
425   return 0;
426 }
427 
428 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
429 {
430   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
431   int          ierr, format;
432   PetscObject  vobj = (PetscObject) viewer;
433   FILE         *fd;
434 
435   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
436     ierr = ViewerFileGetFormat_Private(viewer,&format);
437     if (format == FILE_FORMAT_INFO) {
438       int nz, nzalloc, mem, rank;
439       MPI_Comm_rank(mat->comm,&rank);
440       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
441       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
442       MPIU_Seq_begin(mat->comm,1);
443         fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",
444             rank,mdn->m,nz,nzalloc,mem);
445       fflush(fd);
446       MPIU_Seq_end(mat->comm,1);
447       ierr = VecScatterView(mdn->Mvctx,viewer);
448       return 0;
449     }
450   }
451 
452   if (vobj->type == ASCII_FILE_VIEWER) {
453     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
454     MPIU_Seq_begin(mat->comm,1);
455     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
456              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
457     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
458     fflush(fd);
459     MPIU_Seq_end(mat->comm,1);
460   }
461   else {
462     int size = mdn->size, rank = mdn->rank;
463     if (size == 1) {
464       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
465     }
466     else {
467       /* assemble the entire matrix onto first processor. */
468       Mat          A;
469       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
470       Scalar       *vals;
471       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
472 
473       if (!rank) {
474         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr);
475       }
476       else {
477         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr);
478       }
479       PLogObjectParent(mat,A);
480 
481       /* Copy the matrix ... This isn't the most efficient means,
482          but it's quick for now */
483       row = mdn->rstart; m = Amdn->m;
484       for ( i=0; i<m; i++ ) {
485         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
486         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
487         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
488         row++;
489       }
490 
491       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
492       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
493       if (!rank) {
494         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
495       }
496       ierr = MatDestroy(A); CHKERRQ(ierr);
497     }
498   }
499   return 0;
500 }
501 
502 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
503 {
504   Mat          mat = (Mat) obj;
505   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
506   PetscObject  vobj = (PetscObject) viewer;
507   int          ierr;
508 
509   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
510   if (!viewer) {
511     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
512   }
513   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
514     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
515   }
516   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
517     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
518   }
519   else if (vobj->type == BINARY_FILE_VIEWER) {
520     return MatView_MPIDense_Binary(mat,viewer);
521   }
522   return 0;
523 }
524 
525 static int MatGetInfo_MPIDense(Mat matin,MatInfoType flag,int *nz,
526                              int *nzalloc,int *mem)
527 {
528   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
529   Mat          A = mat->A;
530   int          ierr, isend[3], irecv[3];
531 
532   ierr = MatGetInfo(A,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
533   if (flag == MAT_LOCAL) {
534     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
535   } else if (flag == MAT_GLOBAL_MAX) {
536     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
537     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
538   } else if (flag == MAT_GLOBAL_SUM) {
539     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
540     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
541   }
542   return 0;
543 }
544 
545 static int MatSetOption_MPIDense(Mat A,MatOption op)
546 {
547   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
548 
549   if (op == NO_NEW_NONZERO_LOCATIONS ||
550       op == YES_NEW_NONZERO_LOCATIONS ||
551       op == COLUMNS_SORTED ||
552       op == ROW_ORIENTED) {
553         MatSetOption(a->A,op);
554   }
555   else if (op == ROWS_SORTED ||
556            op == SYMMETRIC_MATRIX ||
557            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
558            op == YES_NEW_DIAGONALS)
559     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
560   else if (op == COLUMN_ORIENTED)
561     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
562   else if (op == NO_NEW_DIAGONALS)
563     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
564   else
565     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
566   return 0;
567 }
568 
569 static int MatGetSize_MPIDense(Mat matin,int *m,int *n)
570 {
571   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
572   *m = mat->M; *n = mat->N;
573   return 0;
574 }
575 
576 static int MatGetLocalSize_MPIDense(Mat matin,int *m,int *n)
577 {
578   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
579   *m = mat->m; *n = mat->N;
580   return 0;
581 }
582 
583 static int MatGetOwnershipRange_MPIDense(Mat matin,int *m,int *n)
584 {
585   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
586   *m = mat->rstart; *n = mat->rend;
587   return 0;
588 }
589 
590 static int MatGetRow_MPIDense(Mat matin,int row,int *nz,int **idx,Scalar **v)
591 {
592   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
593   int          lrow, rstart = mat->rstart, rend = mat->rend;
594 
595   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
596   lrow = row - rstart;
597   return MatGetRow(mat->A,lrow,nz,idx,v);
598 }
599 
600 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
601 {
602   if (idx) PETSCFREE(*idx);
603   if (v) PETSCFREE(*v);
604   return 0;
605 }
606 
607 static int MatNorm_MPIDense(Mat mat,MatNormType type,double *norm)
608 {
609   *norm = 0.0;
610   SETERRQ(1,"MatNorm_MPIDense:Not finished");
611   return 0;
612 }
613 
614 static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
615 
616 /* -------------------------------------------------------------------*/
617 static struct _MatOps MatOps = {MatSetValues_MPIDense,
618        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
619        MatMult_MPIDense,MatMultAdd_MPIDense,
620        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
621        0, 0,
622        0, 0, 0,
623        0, 0, 0,
624        MatGetInfo_MPIDense, 0,
625        MatGetDiagonal_MPIDense, 0, MatNorm_MPIDense,
626        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
627        0,
628        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
629        0,
630        0, 0, 0, 0,
631        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
632        MatGetOwnershipRange_MPIDense,
633        0, 0,
634        0, 0, 0, 0, 0, MatCopyPrivate_MPIDense};
635 
636 /*@C
637    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
638 
639    Input Parameters:
640 .  comm - MPI communicator
641 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
642 .  n - number of local columns (or PETSC_DECIDE to have calculated
643            if N is given)
644 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
645 .  N - number of global columns (or PETSC_DECIDE to have calculated
646            if n is given)
647 
648    Output Parameter:
649 .  newmat - the matrix
650 
651    Notes:
652    The dense format is fully compatible with standard Fortran 77
653    storage by columns.
654 
655    The user MUST specify either the local or global matrix dimensions
656    (possibly both).
657 
658 .keywords: matrix, dense, parallel
659 
660 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
661 @*/
662 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat)
663 {
664   Mat          mat;
665   Mat_MPIDense *a;
666   int          ierr, i;
667 
668   *newmat         = 0;
669   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
670   PLogObjectCreate(mat);
671   mat->data       = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a);
672   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
673   mat->destroy    = MatDestroy_MPIDense;
674   mat->view       = MatView_MPIDense;
675   mat->factor     = 0;
676 
677   a->insertmode = NOT_SET_VALUES;
678   MPI_Comm_rank(comm,&a->rank);
679   MPI_Comm_size(comm,&a->size);
680 
681   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
682   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
683 
684   /* each row stores all columns */
685   if (N == PETSC_DECIDE) N = n;
686   if (n == PETSC_DECIDE) n = N;
687   if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported");
688   a->N = N;
689   a->M = M;
690   a->m = m;
691   a->n = n;
692 
693   /* build local table of row and column ownerships */
694   a->rowners = (int *) PETSCMALLOC((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
695   PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
696                        sizeof(Mat_MPIDense));
697   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
698   a->rowners[0] = 0;
699   for ( i=2; i<=a->size; i++ ) {
700     a->rowners[i] += a->rowners[i-1];
701   }
702   a->rstart = a->rowners[a->rank];
703   a->rend   = a->rowners[a->rank+1];
704 
705   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr);
706   PLogObjectParent(mat,a->A);
707 
708   /* build cache for off array entries formed */
709   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
710 
711   /* stuff used for matrix vector multiply */
712   a->lvec      = 0;
713   a->Mvctx     = 0;
714   a->assembled = 0;
715 
716   *newmat = mat;
717   return 0;
718 }
719 
720 static int MatCopyPrivate_MPIDense(Mat matin,Mat *newmat,int cpvalues)
721 {
722   Mat        mat;
723   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) matin->data;
724   int        ierr;
725 
726   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
727   *newmat       = 0;
728   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,matin->comm);
729   PLogObjectCreate(mat);
730   mat->data     = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a);
731   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
732   mat->destroy  = MatDestroy_MPIDense;
733   mat->view     = MatView_MPIDense;
734   mat->factor   = matin->factor;
735 
736   a->m          = oldmat->m;
737   a->n          = oldmat->n;
738   a->M          = oldmat->M;
739   a->N          = oldmat->N;
740 
741   a->assembled  = 1;
742   a->rstart     = oldmat->rstart;
743   a->rend       = oldmat->rend;
744   a->size       = oldmat->size;
745   a->rank       = oldmat->rank;
746   a->insertmode = NOT_SET_VALUES;
747 
748   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
749   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
750   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
751   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
752 
753   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
754   PLogObjectParent(mat,a->lvec);
755   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
756   PLogObjectParent(mat,a->Mvctx);
757   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
758   PLogObjectParent(mat,a->A);
759   *newmat = mat;
760   return 0;
761 }
762 
763 #include "sysio.h"
764 
765 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
766 {
767   Mat          A;
768   int          i, nz, ierr, j,rstart, rend, fd;
769   Scalar       *vals,*svals;
770   PetscObject  vobj = (PetscObject) bview;
771   MPI_Comm     comm = vobj->comm;
772   MPI_Status   status;
773   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
774   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
775   int          tag = ((PetscObject)bview)->tag;
776 
777   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
778   if (!rank) {
779     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
780     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
781     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
782   }
783 
784   MPI_Bcast(header+1,3,MPI_INT,0,comm);
785   M = header[1]; N = header[2];
786   /* determine ownership of all rows */
787   m = M/size + ((M % size) > rank);
788   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
789   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
790   rowners[0] = 0;
791   for ( i=2; i<=size; i++ ) {
792     rowners[i] += rowners[i-1];
793   }
794   rstart = rowners[rank];
795   rend   = rowners[rank+1];
796 
797   /* distribute row lengths to all processors */
798   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
799   offlens = ourlens + (rend-rstart);
800   if (!rank) {
801     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
802     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
803     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
804     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
805     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
806     PETSCFREE(sndcounts);
807   }
808   else {
809     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
810   }
811 
812   if (!rank) {
813     /* calculate the number of nonzeros on each processor */
814     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
815     PetscZero(procsnz,size*sizeof(int));
816     for ( i=0; i<size; i++ ) {
817       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
818         procsnz[i] += rowlengths[j];
819       }
820     }
821     PETSCFREE(rowlengths);
822 
823     /* determine max buffer needed and allocate it */
824     maxnz = 0;
825     for ( i=0; i<size; i++ ) {
826       maxnz = PETSCMAX(maxnz,procsnz[i]);
827     }
828     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
829 
830     /* read in my part of the matrix column indices  */
831     nz = procsnz[0];
832     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
833     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
834 
835     /* read in every one elses and ship off */
836     for ( i=1; i<size; i++ ) {
837       nz = procsnz[i];
838       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
839       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
840     }
841     PETSCFREE(cols);
842   }
843   else {
844     /* determine buffer space needed for message */
845     nz = 0;
846     for ( i=0; i<m; i++ ) {
847       nz += ourlens[i];
848     }
849     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
850 
851     /* receive message of column indices*/
852     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
853     MPI_Get_count(&status,MPI_INT,&maxnz);
854     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
855   }
856 
857   /* loop over local rows, determining number of off diagonal entries */
858   PetscZero(offlens,m*sizeof(int));
859   jj = 0;
860   for ( i=0; i<m; i++ ) {
861     for ( j=0; j<ourlens[i]; j++ ) {
862       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
863       jj++;
864     }
865   }
866 
867   /* create our matrix */
868   for ( i=0; i<m; i++ ) {
869     ourlens[i] -= offlens[i];
870   }
871   if (type == MATMPIDENSE) {
872     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
873   }
874   A = *newmat;
875   MatSetOption(A,COLUMNS_SORTED);
876   for ( i=0; i<m; i++ ) {
877     ourlens[i] += offlens[i];
878   }
879 
880   if (!rank) {
881     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
882 
883     /* read in my part of the matrix numerical values  */
884     nz = procsnz[0];
885     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
886 
887     /* insert into matrix */
888     jj      = rstart;
889     smycols = mycols;
890     svals   = vals;
891     for ( i=0; i<m; i++ ) {
892       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
893       smycols += ourlens[i];
894       svals   += ourlens[i];
895       jj++;
896     }
897 
898     /* read in other processors and ship out */
899     for ( i=1; i<size; i++ ) {
900       nz = procsnz[i];
901       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
902       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
903     }
904     PETSCFREE(procsnz);
905   }
906   else {
907     /* receive numeric values */
908     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
909 
910     /* receive message of values*/
911     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
912     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
913     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
914 
915     /* insert into matrix */
916     jj      = rstart;
917     smycols = mycols;
918     svals   = vals;
919     for ( i=0; i<m; i++ ) {
920       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
921       smycols += ourlens[i];
922       svals   += ourlens[i];
923       jj++;
924     }
925   }
926   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
927 
928   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
930   return 0;
931 }
932