xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6c35f2bdf7f193ed61e45ab3dff1988b03a68b29)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: mpiaij.c,v 1.163 1996/08/15 12:47:27 bsmith Exp bsmith $";
4 #endif
5 
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 static int CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
18   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
19   int        n = B->n,i;
20 
21   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22   PLogObjectMemory(mat,aij->N*sizeof(int));
23   PetscMemzero(aij->colmap,aij->N*sizeof(int));
24   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
25   return 0;
26 }
27 
28 extern int DisAssemble_MPIAIJ(Mat);
29 
30 static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
31 {
32   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33   int        ierr;
34   if (aij->size == 1) {
35     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
36   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
37   return 0;
38 }
39 
40 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (mat->was_assembled) {
67             if (!aij->colmap) {
68               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
69             }
70             col = aij->colmap[in[j]] - 1;
71             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
72               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
73               col =  in[j];
74             }
75           }
76           else col = in[j];
77           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
78           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
79         }
80       }
81     }
82     else {
83       if (roworiented) {
84         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
85       }
86       else {
87         row = im[i];
88         for ( j=0; j<n; j++ ) {
89           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
90         }
91       }
92     }
93   }
94   return 0;
95 }
96 
97 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
98 {
99   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
100   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
101   int        cstart = aij->cstart, cend = aij->cend,row,col;
102 
103   for ( i=0; i<m; i++ ) {
104     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106     if (idxm[i] >= rstart && idxm[i] < rend) {
107       row = idxm[i] - rstart;
108       for ( j=0; j<n; j++ ) {
109         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111         if (idxn[j] >= cstart && idxn[j] < cend){
112           col = idxn[j] - cstart;
113           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114         }
115         else {
116           if (!aij->colmap) {
117             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
118           }
119           col = aij->colmap[idxn[j]] - 1;
120           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
121           else {
122             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
123           }
124         }
125       }
126     }
127     else {
128       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
129     }
130   }
131   return 0;
132 }
133 
134 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
135 {
136   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
137   MPI_Comm    comm = mat->comm;
138   int         size = aij->size, *owners = aij->rowners;
139   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
140   MPI_Request *send_waits,*recv_waits;
141   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
142   InsertMode  addv;
143   Scalar      *rvalues,*svalues;
144 
145   /* make sure all processors are either in INSERTMODE or ADDMODE */
146   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
147   if (addv == (ADD_VALUES|INSERT_VALUES)) {
148     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
149   }
150   aij->insertmode = addv; /* in case this processor had no cache */
151 
152   /*  first count number of contributors to each processor */
153   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
154   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
155   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
156   for ( i=0; i<aij->stash.n; i++ ) {
157     idx = aij->stash.idx[i];
158     for ( j=0; j<size; j++ ) {
159       if (idx >= owners[j] && idx < owners[j+1]) {
160         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
161       }
162     }
163   }
164   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
165 
166   /* inform other processors of number of messages and max length*/
167   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
168   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
169   nreceives = work[rank];
170   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
171   nmax = work[rank];
172   PetscFree(work);
173 
174   /* post receives:
175        1) each message will consist of ordered pairs
176      (global index,value) we store the global index as a double
177      to simplify the message passing.
178        2) since we don't know how long each individual message is we
179      allocate the largest needed buffer for each receive. Potentially
180      this is a lot of wasted space.
181 
182 
183        This could be done better.
184   */
185   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
186   CHKPTRQ(rvalues);
187   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
188   CHKPTRQ(recv_waits);
189   for ( i=0; i<nreceives; i++ ) {
190     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
191               comm,recv_waits+i);
192   }
193 
194   /* do sends:
195       1) starts[i] gives the starting index in svalues for stuff going to
196          the ith processor
197   */
198   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
199   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
200   CHKPTRQ(send_waits);
201   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
202   starts[0] = 0;
203   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
204   for ( i=0; i<aij->stash.n; i++ ) {
205     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
206     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
207     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
208   }
209   PetscFree(owner);
210   starts[0] = 0;
211   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
212   count = 0;
213   for ( i=0; i<size; i++ ) {
214     if (procs[i]) {
215       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
216                 comm,send_waits+count++);
217     }
218   }
219   PetscFree(starts); PetscFree(nprocs);
220 
221   /* Free cache space */
222   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
223   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
224 
225   aij->svalues    = svalues;    aij->rvalues    = rvalues;
226   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
227   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
228   aij->rmax       = nmax;
229 
230   return 0;
231 }
232 extern int MatSetUpMultiply_MPIAIJ(Mat);
233 
234 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
235 {
236   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
237   MPI_Status  *send_status,recv_status;
238   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
239   int         row,col,other_disassembled;
240   Scalar      *values,val;
241   InsertMode  addv = aij->insertmode;
242 
243   /*  wait on receives */
244   while (count) {
245     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
246     /* unpack receives into our local space */
247     values = aij->rvalues + 3*imdex*aij->rmax;
248     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
249     n = n/3;
250     for ( i=0; i<n; i++ ) {
251       row = (int) PetscReal(values[3*i]) - aij->rstart;
252       col = (int) PetscReal(values[3*i+1]);
253       val = values[3*i+2];
254       if (col >= aij->cstart && col < aij->cend) {
255         col -= aij->cstart;
256         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
257       }
258       else {
259         if (mat->was_assembled) {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263           col = aij->colmap[col] - 1;
264           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
265             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
266             col = (int) PetscReal(values[3*i+1]);
267           }
268         }
269         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
270       }
271     }
272     count--;
273   }
274   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
275 
276   /* wait on sends */
277   if (aij->nsends) {
278     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
279     CHKPTRQ(send_status);
280     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
281     PetscFree(send_status);
282   }
283   PetscFree(aij->send_waits); PetscFree(aij->svalues);
284 
285   aij->insertmode = NOT_SET_VALUES;
286   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
287   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
288 
289   /* determine if any processor has disassembled, if so we must
290      also disassemble ourselfs, in order that we may reassemble. */
291   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
292   if (mat->was_assembled && !other_disassembled) {
293     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
294   }
295 
296   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
297     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
298   }
299   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
300   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
301 
302   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
303   return 0;
304 }
305 
306 static int MatZeroEntries_MPIAIJ(Mat A)
307 {
308   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
309   int        ierr;
310   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
311   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
312   return 0;
313 }
314 
315 /* the code does not do the diagonal entries correctly unless the
316    matrix is square and the column and row owerships are identical.
317    This is a BUG. The only way to fix it seems to be to access
318    aij->A and aij->B directly and not through the MatZeroRows()
319    routine.
320 */
321 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
322 {
323   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
324   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
325   int            *procs,*nprocs,j,found,idx,nsends,*work;
326   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
327   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
328   int            *lens,imdex,*lrows,*values;
329   MPI_Comm       comm = A->comm;
330   MPI_Request    *send_waits,*recv_waits;
331   MPI_Status     recv_status,*send_status;
332   IS             istmp;
333 
334   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
335   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
336 
337   /*  first count number of contributors to each processor */
338   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
339   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
340   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
341   for ( i=0; i<N; i++ ) {
342     idx = rows[i];
343     found = 0;
344     for ( j=0; j<size; j++ ) {
345       if (idx >= owners[j] && idx < owners[j+1]) {
346         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
347       }
348     }
349     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
350   }
351   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
352 
353   /* inform other processors of number of messages and max length*/
354   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
355   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
356   nrecvs = work[rank];
357   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
358   nmax = work[rank];
359   PetscFree(work);
360 
361   /* post receives:   */
362   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
363   CHKPTRQ(rvalues);
364   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
365   CHKPTRQ(recv_waits);
366   for ( i=0; i<nrecvs; i++ ) {
367     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
368   }
369 
370   /* do sends:
371       1) starts[i] gives the starting index in svalues for stuff going to
372          the ith processor
373   */
374   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
375   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
376   CHKPTRQ(send_waits);
377   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
378   starts[0] = 0;
379   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
380   for ( i=0; i<N; i++ ) {
381     svalues[starts[owner[i]]++] = rows[i];
382   }
383   ISRestoreIndices(is,&rows);
384 
385   starts[0] = 0;
386   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
387   count = 0;
388   for ( i=0; i<size; i++ ) {
389     if (procs[i]) {
390       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
391     }
392   }
393   PetscFree(starts);
394 
395   base = owners[rank];
396 
397   /*  wait on receives */
398   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
399   source = lens + nrecvs;
400   count  = nrecvs; slen = 0;
401   while (count) {
402     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
403     /* unpack receives into our local space */
404     MPI_Get_count(&recv_status,MPI_INT,&n);
405     source[imdex]  = recv_status.MPI_SOURCE;
406     lens[imdex]  = n;
407     slen += n;
408     count--;
409   }
410   PetscFree(recv_waits);
411 
412   /* move the data into the send scatter */
413   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
414   count = 0;
415   for ( i=0; i<nrecvs; i++ ) {
416     values = rvalues + i*nmax;
417     for ( j=0; j<lens[i]; j++ ) {
418       lrows[count++] = values[j] - base;
419     }
420   }
421   PetscFree(rvalues); PetscFree(lens);
422   PetscFree(owner); PetscFree(nprocs);
423 
424   /* actually zap the local rows */
425   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
426   PLogObjectParent(A,istmp);
427   PetscFree(lrows);
428   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
429   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
430   ierr = ISDestroy(istmp); CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
435     CHKPTRQ(send_status);
436     MPI_Waitall(nsends,send_waits,send_status);
437     PetscFree(send_status);
438   }
439   PetscFree(send_waits); PetscFree(svalues);
440 
441   return 0;
442 }
443 
444 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
445 {
446   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
447   int        ierr,nt;
448 
449   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
450   if (nt != a->n) {
451     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
452   }
453   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
454   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
455   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
456   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
457   return 0;
458 }
459 
460 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
463   int        ierr;
464   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
465   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
466   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
467   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
468   return 0;
469 }
470 
471 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
472 {
473   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
474   int        ierr;
475 
476   /* do nondiagonal part */
477   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
478   /* send it on its way */
479   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
480   /* do local part */
481   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
482   /* receive remote parts: note this assumes the values are not actually */
483   /* inserted in yy until the next line, which is true for my implementation*/
484   /* but is not perhaps always true. */
485   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
486   return 0;
487 }
488 
489 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
492   int        ierr;
493 
494   /* do nondiagonal part */
495   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
496   /* send it on its way */
497   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
498   /* do local part */
499   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
500   /* receive remote parts: note this assumes the values are not actually */
501   /* inserted in yy until the next line, which is true for my implementation*/
502   /* but is not perhaps always true. */
503   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
504                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
505   return 0;
506 }
507 
508 /*
509   This only works correctly for square matrices where the subblock A->A is the
510    diagonal block
511 */
512 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
513 {
514   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
515   if (a->M != a->N)
516     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
517   if (a->rstart != a->cstart || a->rend != a->cend) {
518     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
519   return MatGetDiagonal(a->A,v);
520 }
521 
522 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
523 {
524   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
525   int        ierr;
526   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
527   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
528   return 0;
529 }
530 
531 static int MatDestroy_MPIAIJ(PetscObject obj)
532 {
533   Mat        mat = (Mat) obj;
534   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
535   int        ierr;
536 #if defined(PETSC_LOG)
537   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
538 #endif
539   PetscFree(aij->rowners);
540   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
541   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
542   if (aij->colmap) PetscFree(aij->colmap);
543   if (aij->garray) PetscFree(aij->garray);
544   if (aij->lvec)   VecDestroy(aij->lvec);
545   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
546   if (aij->rowvalues) PetscFree(aij->rowvalues);
547   PetscFree(aij);
548   PLogObjectDestroy(mat);
549   PetscHeaderDestroy(mat);
550   return 0;
551 }
552 #include "draw.h"
553 #include "pinclude/pviewer.h"
554 
555 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
556 {
557   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
558   int         ierr;
559 
560   if (aij->size == 1) {
561     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
562   }
563   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
564   return 0;
565 }
566 
567 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
568 {
569   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
570   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
571   int         ierr, format,shift = C->indexshift,rank;
572   FILE        *fd;
573   ViewerType  vtype;
574 
575   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
576   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
577     ierr = ViewerGetFormat(viewer,&format);
578     if (format == ASCII_FORMAT_INFO_DETAILED) {
579       int nz, nzalloc, mem, flg;
580       MPI_Comm_rank(mat->comm,&rank);
581       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
582       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
583       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
584       PetscSequentialPhaseBegin(mat->comm,1);
585       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
586          rank,aij->m,nz,nzalloc,mem);
587       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
588          rank,aij->m,nz,nzalloc,mem);
589       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
590       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
591       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
592       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
593       fflush(fd);
594       PetscSequentialPhaseEnd(mat->comm,1);
595       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
596       return 0;
597     }
598     else if (format == ASCII_FORMAT_INFO) {
599       return 0;
600     }
601   }
602 
603   if (vtype == DRAW_VIEWER) {
604     Draw       draw;
605     PetscTruth isnull;
606     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
607     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
608   }
609 
610   if (vtype == ASCII_FILE_VIEWER) {
611     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
612     PetscSequentialPhaseBegin(mat->comm,1);
613     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
614            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
615            aij->cend);
616     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
617     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
618     fflush(fd);
619     PetscSequentialPhaseEnd(mat->comm,1);
620   }
621   else {
622     int size = aij->size;
623     rank = aij->rank;
624     if (size == 1) {
625       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
626     }
627     else {
628       /* assemble the entire matrix onto first processor. */
629       Mat         A;
630       Mat_SeqAIJ *Aloc;
631       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
632       Scalar      *a;
633 
634       if (!rank) {
635         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
636                CHKERRQ(ierr);
637       }
638       else {
639         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
640                CHKERRQ(ierr);
641       }
642       PLogObjectParent(mat,A);
643 
644       /* copy over the A part */
645       Aloc = (Mat_SeqAIJ*) aij->A->data;
646       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
647       row = aij->rstart;
648       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
649       for ( i=0; i<m; i++ ) {
650         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
651         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
652       }
653       aj = Aloc->j;
654       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
655 
656       /* copy over the B part */
657       Aloc = (Mat_SeqAIJ*) aij->B->data;
658       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
659       row = aij->rstart;
660       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
661       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
662       for ( i=0; i<m; i++ ) {
663         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
664         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
665       }
666       PetscFree(ct);
667       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
668       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
669       if (!rank) {
670         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
671       }
672       ierr = MatDestroy(A); CHKERRQ(ierr);
673     }
674   }
675   return 0;
676 }
677 
678 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
679 {
680   Mat         mat = (Mat) obj;
681   int         ierr;
682   ViewerType  vtype;
683 
684   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
685   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
686       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
687     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
688   }
689   else if (vtype == BINARY_FILE_VIEWER) {
690     return MatView_MPIAIJ_Binary(mat,viewer);
691   }
692   return 0;
693 }
694 
695 /*
696     This has to provide several versions.
697 
698      2) a) use only local smoothing updating outer values only once.
699         b) local smoothing updating outer values each inner iteration
700      3) color updating out values betwen colors.
701 */
702 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
703                            double fshift,int its,Vec xx)
704 {
705   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
706   Mat        AA = mat->A, BB = mat->B;
707   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
708   Scalar     *b,*x,*xs,*ls,d,*v,sum;
709   int        ierr,*idx, *diag;
710   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
711 
712   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
713   xs = x + shift; /* shift by one for index start of 1 */
714   ls = ls + shift;
715   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
716   diag = A->diag;
717   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
718     if (flag & SOR_ZERO_INITIAL_GUESS) {
719       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
720     }
721     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
722     CHKERRQ(ierr);
723     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
724     CHKERRQ(ierr);
725     while (its--) {
726       /* go down through the rows */
727       for ( i=0; i<m; i++ ) {
728         n    = A->i[i+1] - A->i[i];
729         idx  = A->j + A->i[i] + shift;
730         v    = A->a + A->i[i] + shift;
731         sum  = b[i];
732         SPARSEDENSEMDOT(sum,xs,v,idx,n);
733         d    = fshift + A->a[diag[i]+shift];
734         n    = B->i[i+1] - B->i[i];
735         idx  = B->j + B->i[i] + shift;
736         v    = B->a + B->i[i] + shift;
737         SPARSEDENSEMDOT(sum,ls,v,idx,n);
738         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
739       }
740       /* come up through the rows */
741       for ( i=m-1; i>-1; i-- ) {
742         n    = A->i[i+1] - A->i[i];
743         idx  = A->j + A->i[i] + shift;
744         v    = A->a + A->i[i] + shift;
745         sum  = b[i];
746         SPARSEDENSEMDOT(sum,xs,v,idx,n);
747         d    = fshift + A->a[diag[i]+shift];
748         n    = B->i[i+1] - B->i[i];
749         idx  = B->j + B->i[i] + shift;
750         v    = B->a + B->i[i] + shift;
751         SPARSEDENSEMDOT(sum,ls,v,idx,n);
752         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
753       }
754     }
755   }
756   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
757     if (flag & SOR_ZERO_INITIAL_GUESS) {
758       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
759     }
760     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
761     CHKERRQ(ierr);
762     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
763     CHKERRQ(ierr);
764     while (its--) {
765       for ( i=0; i<m; i++ ) {
766         n    = A->i[i+1] - A->i[i];
767         idx  = A->j + A->i[i] + shift;
768         v    = A->a + A->i[i] + shift;
769         sum  = b[i];
770         SPARSEDENSEMDOT(sum,xs,v,idx,n);
771         d    = fshift + A->a[diag[i]+shift];
772         n    = B->i[i+1] - B->i[i];
773         idx  = B->j + B->i[i] + shift;
774         v    = B->a + B->i[i] + shift;
775         SPARSEDENSEMDOT(sum,ls,v,idx,n);
776         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
777       }
778     }
779   }
780   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
781     if (flag & SOR_ZERO_INITIAL_GUESS) {
782       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
783     }
784     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
785                             mat->Mvctx); CHKERRQ(ierr);
786     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
787                             mat->Mvctx); CHKERRQ(ierr);
788     while (its--) {
789       for ( i=m-1; i>-1; i-- ) {
790         n    = A->i[i+1] - A->i[i];
791         idx  = A->j + A->i[i] + shift;
792         v    = A->a + A->i[i] + shift;
793         sum  = b[i];
794         SPARSEDENSEMDOT(sum,xs,v,idx,n);
795         d    = fshift + A->a[diag[i]+shift];
796         n    = B->i[i+1] - B->i[i];
797         idx  = B->j + B->i[i] + shift;
798         v    = B->a + B->i[i] + shift;
799         SPARSEDENSEMDOT(sum,ls,v,idx,n);
800         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
801       }
802     }
803   }
804   else {
805     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
806   }
807   return 0;
808 }
809 
810 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
811                              int *nzalloc,int *mem)
812 {
813   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
814   Mat        A = mat->A, B = mat->B;
815   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
816 
817   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
818   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
819   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
820   if (flag == MAT_LOCAL) {
821     if (nz)       *nz      = isend[0];
822     if (nzalloc)  *nzalloc = isend[1];
823     if (mem)      *mem     = isend[2];
824   } else if (flag == MAT_GLOBAL_MAX) {
825     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
826     if (nz)      *nz      = irecv[0];
827     if (nzalloc) *nzalloc = irecv[1];
828     if (mem)     *mem     = irecv[2];
829   } else if (flag == MAT_GLOBAL_SUM) {
830     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
831     if (nz)      *nz      = irecv[0];
832     if (nzalloc) *nzalloc = irecv[1];
833     if (mem)     *mem     = irecv[2];
834   }
835   return 0;
836 }
837 
838 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
839 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
840 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
841 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
842 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
843 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
844 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
845 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
846 
847 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
848 {
849   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
850 
851   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
852       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
853       op == MAT_COLUMNS_SORTED ||
854       op == MAT_ROW_ORIENTED) {
855         MatSetOption(a->A,op);
856         MatSetOption(a->B,op);
857   }
858   else if (op == MAT_ROWS_SORTED ||
859            op == MAT_SYMMETRIC ||
860            op == MAT_STRUCTURALLY_SYMMETRIC ||
861            op == MAT_YES_NEW_DIAGONALS)
862     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
863   else if (op == MAT_COLUMN_ORIENTED) {
864     a->roworiented = 0;
865     MatSetOption(a->A,op);
866     MatSetOption(a->B,op);
867   }
868   else if (op == MAT_NO_NEW_DIAGONALS)
869     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
870   else
871     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
872   return 0;
873 }
874 
875 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
876 {
877   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
878   *m = mat->M; *n = mat->N;
879   return 0;
880 }
881 
882 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
883 {
884   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
885   *m = mat->m; *n = mat->N;
886   return 0;
887 }
888 
889 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
890 {
891   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
892   *m = mat->rstart; *n = mat->rend;
893   return 0;
894 }
895 
896 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
897 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
898 
899 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
900 {
901   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
902   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
903   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
904   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
905   int        *cmap, *idx_p;
906 
907   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
908   mat->getrowactive = PETSC_TRUE;
909 
910   if (!mat->rowvalues && (idx || v)) {
911     /*
912         allocate enough space to hold information from the longest row.
913     */
914     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
915     int     max = 1,m = mat->m,tmp;
916     for ( i=0; i<m; i++ ) {
917       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
918       if (max < tmp) { max = tmp; }
919     }
920     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
921     CHKPTRQ(mat->rowvalues);
922     mat->rowindices = (int *) (mat->rowvalues + max);
923   }
924 
925   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
926   lrow = row - rstart;
927 
928   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
929   if (!v)   {pvA = 0; pvB = 0;}
930   if (!idx) {pcA = 0; if (!v) pcB = 0;}
931   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
932   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
933   nztot = nzA + nzB;
934 
935   cmap  = mat->garray;
936   if (v  || idx) {
937     if (nztot) {
938       /* Sort by increasing column numbers, assuming A and B already sorted */
939       int imark = -1;
940       if (v) {
941         *v = v_p = mat->rowvalues;
942         for ( i=0; i<nzB; i++ ) {
943           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
944           else break;
945         }
946         imark = i;
947         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
948         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
949       }
950       if (idx) {
951         *idx = idx_p = mat->rowindices;
952         if (imark > -1) {
953           for ( i=0; i<imark; i++ ) {
954             idx_p[i] = cmap[cworkB[i]];
955           }
956         } else {
957           for ( i=0; i<nzB; i++ ) {
958             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
959             else break;
960           }
961           imark = i;
962         }
963         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
964         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
965       }
966     }
967     else {
968       if (idx) *idx = 0;
969       if (v)   *v   = 0;
970     }
971   }
972   *nz = nztot;
973   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
974   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
975   return 0;
976 }
977 
978 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
979 {
980   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
981   if (aij->getrowactive == PETSC_FALSE) {
982     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
983   }
984   aij->getrowactive = PETSC_FALSE;
985   return 0;
986 }
987 
988 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
989 {
990   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
991   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
992   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
993   double     sum = 0.0;
994   Scalar     *v;
995 
996   if (aij->size == 1) {
997     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
998   } else {
999     if (type == NORM_FROBENIUS) {
1000       v = amat->a;
1001       for (i=0; i<amat->nz; i++ ) {
1002 #if defined(PETSC_COMPLEX)
1003         sum += real(conj(*v)*(*v)); v++;
1004 #else
1005         sum += (*v)*(*v); v++;
1006 #endif
1007       }
1008       v = bmat->a;
1009       for (i=0; i<bmat->nz; i++ ) {
1010 #if defined(PETSC_COMPLEX)
1011         sum += real(conj(*v)*(*v)); v++;
1012 #else
1013         sum += (*v)*(*v); v++;
1014 #endif
1015       }
1016       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1017       *norm = sqrt(*norm);
1018     }
1019     else if (type == NORM_1) { /* max column norm */
1020       double *tmp, *tmp2;
1021       int    *jj, *garray = aij->garray;
1022       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1023       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1024       PetscMemzero(tmp,aij->N*sizeof(double));
1025       *norm = 0.0;
1026       v = amat->a; jj = amat->j;
1027       for ( j=0; j<amat->nz; j++ ) {
1028         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1029       }
1030       v = bmat->a; jj = bmat->j;
1031       for ( j=0; j<bmat->nz; j++ ) {
1032         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1033       }
1034       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1035       for ( j=0; j<aij->N; j++ ) {
1036         if (tmp2[j] > *norm) *norm = tmp2[j];
1037       }
1038       PetscFree(tmp); PetscFree(tmp2);
1039     }
1040     else if (type == NORM_INFINITY) { /* max row norm */
1041       double ntemp = 0.0;
1042       for ( j=0; j<amat->m; j++ ) {
1043         v = amat->a + amat->i[j] + shift;
1044         sum = 0.0;
1045         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1046           sum += PetscAbsScalar(*v); v++;
1047         }
1048         v = bmat->a + bmat->i[j] + shift;
1049         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1050           sum += PetscAbsScalar(*v); v++;
1051         }
1052         if (sum > ntemp) ntemp = sum;
1053       }
1054       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1055     }
1056     else {
1057       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1058     }
1059   }
1060   return 0;
1061 }
1062 
1063 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1064 {
1065   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1066   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1067   int        ierr,shift = Aloc->indexshift;
1068   Mat        B;
1069   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1070   Scalar     *array;
1071 
1072   if (matout == PETSC_NULL && M != N)
1073     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1074   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1075          PETSC_NULL,&B); CHKERRQ(ierr);
1076 
1077   /* copy over the A part */
1078   Aloc = (Mat_SeqAIJ*) a->A->data;
1079   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1080   row = a->rstart;
1081   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1082   for ( i=0; i<m; i++ ) {
1083     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1084     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1085   }
1086   aj = Aloc->j;
1087   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1088 
1089   /* copy over the B part */
1090   Aloc = (Mat_SeqAIJ*) a->B->data;
1091   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1092   row = a->rstart;
1093   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1094   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1095   for ( i=0; i<m; i++ ) {
1096     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1097     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1098   }
1099   PetscFree(ct);
1100   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1101   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1102   if (matout != PETSC_NULL) {
1103     *matout = B;
1104   } else {
1105     /* This isn't really an in-place transpose .... but free data structures from a */
1106     PetscFree(a->rowners);
1107     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1108     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1109     if (a->colmap) PetscFree(a->colmap);
1110     if (a->garray) PetscFree(a->garray);
1111     if (a->lvec) VecDestroy(a->lvec);
1112     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1113     PetscFree(a);
1114     PetscMemcpy(A,B,sizeof(struct _Mat));
1115     PetscHeaderDestroy(B);
1116   }
1117   return 0;
1118 }
1119 
1120 extern int MatPrintHelp_SeqAIJ(Mat);
1121 static int MatPrintHelp_MPIAIJ(Mat A)
1122 {
1123   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1124 
1125   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1126   else return 0;
1127 }
1128 
1129 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1130 {
1131   *bs = 1;
1132   return 0;
1133 }
1134 
1135 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1136 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1137 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1138 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1139 /* -------------------------------------------------------------------*/
1140 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1141        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1142        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1143        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1144        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1145        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1146        MatLUFactor_MPIAIJ,0,
1147        MatRelax_MPIAIJ,
1148        MatTranspose_MPIAIJ,
1149        MatGetInfo_MPIAIJ,0,
1150        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1151        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1152        0,
1153        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1154        MatGetReordering_MPIAIJ,
1155        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1156        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1157        MatILUFactorSymbolic_MPIAIJ,0,
1158        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1159        0,0,0,
1160        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1161        MatPrintHelp_MPIAIJ,
1162        MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ};
1163 
1164 /*@C
1165    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1166    (the default parallel PETSc format).  For good matrix assembly performance
1167    the user should preallocate the matrix storage by setting the parameters
1168    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1169    performance can be increased by more than a factor of 50.
1170 
1171    Input Parameters:
1172 .  comm - MPI communicator
1173 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1174            This value should be the same as the local size used in creating the
1175            y vector for the matrix-vector product y = Ax.
1176 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1177            This value should be the same as the local size used in creating the
1178            x vector for the matrix-vector product y = Ax.
1179 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1180 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1181 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1182            (same for all local rows)
1183 .  d_nzz - array containing the number of nonzeros in the various rows of the
1184            diagonal portion of the local submatrix (possibly different for each row)
1185            or PETSC_NULL. You must leave room for the diagonal entry even if
1186            it is zero.
1187 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1188            submatrix (same for all local rows).
1189 .  o_nzz - array containing the number of nonzeros in the various rows of the
1190            off-diagonal portion of the local submatrix (possibly different for
1191            each row) or PETSC_NULL.
1192 
1193    Output Parameter:
1194 .  A - the matrix
1195 
1196    Notes:
1197    The AIJ format (also called the Yale sparse matrix format or
1198    compressed row storage), is fully compatible with standard Fortran 77
1199    storage.  That is, the stored row and column indices can begin at
1200    either one (as in Fortran) or zero.  See the users manual for details.
1201 
1202    The user MUST specify either the local or global matrix dimensions
1203    (possibly both).
1204 
1205    By default, this format uses inodes (identical nodes) when possible.
1206    We search for consecutive rows with the same nonzero structure, thereby
1207    reusing matrix information to achieve increased efficiency.
1208 
1209    Options Database Keys:
1210 $    -mat_aij_no_inode  - Do not use inodes
1211 $    -mat_aij_inode_limit <limit> - Set inode limit.
1212 $        (max limit=5)
1213 $    -mat_aij_oneindex - Internally use indexing starting at 1
1214 $        rather than 0.  Note: When calling MatSetValues(),
1215 $        the user still MUST index entries starting at 0!
1216 
1217    Storage Information:
1218    For a square global matrix we define each processor's diagonal portion
1219    to be its local rows and the corresponding columns (a square submatrix);
1220    each processor's off-diagonal portion encompasses the remainder of the
1221    local matrix (a rectangular submatrix).
1222 
1223    The user can specify preallocated storage for the diagonal part of
1224    the local submatrix with either d_nz or d_nnz (not both).  Set
1225    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1226    memory allocation.  Likewise, specify preallocated storage for the
1227    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1228 
1229    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1230    the figure below we depict these three local rows and all columns (0-11).
1231 
1232 $          0 1 2 3 4 5 6 7 8 9 10 11
1233 $         -------------------
1234 $  row 3  |  o o o d d d o o o o o o
1235 $  row 4  |  o o o d d d o o o o o o
1236 $  row 5  |  o o o d d d o o o o o o
1237 $         -------------------
1238 $
1239 
1240    Thus, any entries in the d locations are stored in the d (diagonal)
1241    submatrix, and any entries in the o locations are stored in the
1242    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1243    stored simply in the MATSEQAIJ format for compressed row storage.
1244 
1245    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1246    and o_nz should indicate the number of nonzeros per row in the o matrix.
1247    In general, for PDE problems in which most nonzeros are near the diagonal,
1248    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1249    or you will get TERRIBLE performance; see the users' manual chapter on
1250    matrices and the file $(PETSC_DIR)/Performance.
1251 
1252 .keywords: matrix, aij, compressed row, sparse, parallel
1253 
1254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1255 @*/
1256 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1257                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1258 {
1259   Mat          B;
1260   Mat_MPIAIJ   *b;
1261   int          ierr, i,sum[2],work[2];
1262 
1263   *A = 0;
1264   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1265   PLogObjectCreate(B);
1266   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1267   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1268   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1269   B->destroy    = MatDestroy_MPIAIJ;
1270   B->view       = MatView_MPIAIJ;
1271   B->factor     = 0;
1272   B->assembled  = PETSC_FALSE;
1273 
1274   b->insertmode = NOT_SET_VALUES;
1275   MPI_Comm_rank(comm,&b->rank);
1276   MPI_Comm_size(comm,&b->size);
1277 
1278   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1279     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1280 
1281   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1282     work[0] = m; work[1] = n;
1283     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1284     if (M == PETSC_DECIDE) M = sum[0];
1285     if (N == PETSC_DECIDE) N = sum[1];
1286   }
1287   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1288   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1289   b->m = m; B->m = m;
1290   b->n = n; B->n = n;
1291   b->N = N; B->N = N;
1292   b->M = M; B->M = M;
1293 
1294   /* build local table of row and column ownerships */
1295   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1296   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1297   b->cowners = b->rowners + b->size + 2;
1298   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1299   b->rowners[0] = 0;
1300   for ( i=2; i<=b->size; i++ ) {
1301     b->rowners[i] += b->rowners[i-1];
1302   }
1303   b->rstart = b->rowners[b->rank];
1304   b->rend   = b->rowners[b->rank+1];
1305   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1306   b->cowners[0] = 0;
1307   for ( i=2; i<=b->size; i++ ) {
1308     b->cowners[i] += b->cowners[i-1];
1309   }
1310   b->cstart = b->cowners[b->rank];
1311   b->cend   = b->cowners[b->rank+1];
1312 
1313   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1314   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1315   PLogObjectParent(B,b->A);
1316   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1317   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1318   PLogObjectParent(B,b->B);
1319 
1320   /* build cache for off array entries formed */
1321   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1322   b->colmap      = 0;
1323   b->garray      = 0;
1324   b->roworiented = 1;
1325 
1326   /* stuff used for matrix vector multiply */
1327   b->lvec      = 0;
1328   b->Mvctx     = 0;
1329 
1330   /* stuff for MatGetRow() */
1331   b->rowindices   = 0;
1332   b->rowvalues    = 0;
1333   b->getrowactive = PETSC_FALSE;
1334 
1335   *A = B;
1336   return 0;
1337 }
1338 
1339 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1340 {
1341   Mat        mat;
1342   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1343   int        ierr, len=0, flg;
1344 
1345   *newmat       = 0;
1346   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1347   PLogObjectCreate(mat);
1348   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1349   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1350   mat->destroy    = MatDestroy_MPIAIJ;
1351   mat->view       = MatView_MPIAIJ;
1352   mat->factor     = matin->factor;
1353   mat->assembled  = PETSC_TRUE;
1354 
1355   a->m = mat->m   = oldmat->m;
1356   a->n = mat->n   = oldmat->n;
1357   a->M = mat->M   = oldmat->M;
1358   a->N = mat->N   = oldmat->N;
1359 
1360   a->rstart       = oldmat->rstart;
1361   a->rend         = oldmat->rend;
1362   a->cstart       = oldmat->cstart;
1363   a->cend         = oldmat->cend;
1364   a->size         = oldmat->size;
1365   a->rank         = oldmat->rank;
1366   a->insertmode   = NOT_SET_VALUES;
1367   a->rowvalues    = 0;
1368   a->getrowactive = PETSC_FALSE;
1369 
1370   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1371   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1372   a->cowners = a->rowners + a->size + 2;
1373   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1374   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1375   if (oldmat->colmap) {
1376     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1377     PLogObjectMemory(mat,(a->N)*sizeof(int));
1378     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1379   } else a->colmap = 0;
1380   if (oldmat->garray) {
1381     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1382     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1383     PLogObjectMemory(mat,len*sizeof(int));
1384     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1385   } else a->garray = 0;
1386 
1387   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1388   PLogObjectParent(mat,a->lvec);
1389   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1390   PLogObjectParent(mat,a->Mvctx);
1391   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1392   PLogObjectParent(mat,a->A);
1393   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1394   PLogObjectParent(mat,a->B);
1395   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1396   if (flg) {
1397     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1398   }
1399   *newmat = mat;
1400   return 0;
1401 }
1402 
1403 #include "sys.h"
1404 
1405 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1406 {
1407   Mat          A;
1408   int          i, nz, ierr, j,rstart, rend, fd;
1409   Scalar       *vals,*svals;
1410   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1411   MPI_Status   status;
1412   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1413   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1414   int          tag = ((PetscObject)viewer)->tag;
1415 
1416   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1417   if (!rank) {
1418     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1419     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1420     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1421   }
1422 
1423   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1424   M = header[1]; N = header[2];
1425   /* determine ownership of all rows */
1426   m = M/size + ((M % size) > rank);
1427   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1428   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1429   rowners[0] = 0;
1430   for ( i=2; i<=size; i++ ) {
1431     rowners[i] += rowners[i-1];
1432   }
1433   rstart = rowners[rank];
1434   rend   = rowners[rank+1];
1435 
1436   /* distribute row lengths to all processors */
1437   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1438   offlens = ourlens + (rend-rstart);
1439   if (!rank) {
1440     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1441     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1442     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1443     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1444     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1445     PetscFree(sndcounts);
1446   }
1447   else {
1448     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1449   }
1450 
1451   if (!rank) {
1452     /* calculate the number of nonzeros on each processor */
1453     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1454     PetscMemzero(procsnz,size*sizeof(int));
1455     for ( i=0; i<size; i++ ) {
1456       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1457         procsnz[i] += rowlengths[j];
1458       }
1459     }
1460     PetscFree(rowlengths);
1461 
1462     /* determine max buffer needed and allocate it */
1463     maxnz = 0;
1464     for ( i=0; i<size; i++ ) {
1465       maxnz = PetscMax(maxnz,procsnz[i]);
1466     }
1467     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1468 
1469     /* read in my part of the matrix column indices  */
1470     nz = procsnz[0];
1471     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1472     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1473 
1474     /* read in every one elses and ship off */
1475     for ( i=1; i<size; i++ ) {
1476       nz = procsnz[i];
1477       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1478       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1479     }
1480     PetscFree(cols);
1481   }
1482   else {
1483     /* determine buffer space needed for message */
1484     nz = 0;
1485     for ( i=0; i<m; i++ ) {
1486       nz += ourlens[i];
1487     }
1488     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1489 
1490     /* receive message of column indices*/
1491     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1492     MPI_Get_count(&status,MPI_INT,&maxnz);
1493     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1494   }
1495 
1496   /* loop over local rows, determining number of off diagonal entries */
1497   PetscMemzero(offlens,m*sizeof(int));
1498   jj = 0;
1499   for ( i=0; i<m; i++ ) {
1500     for ( j=0; j<ourlens[i]; j++ ) {
1501       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1502       jj++;
1503     }
1504   }
1505 
1506   /* create our matrix */
1507   for ( i=0; i<m; i++ ) {
1508     ourlens[i] -= offlens[i];
1509   }
1510   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1511   A = *newmat;
1512   MatSetOption(A,MAT_COLUMNS_SORTED);
1513   for ( i=0; i<m; i++ ) {
1514     ourlens[i] += offlens[i];
1515   }
1516 
1517   if (!rank) {
1518     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1519 
1520     /* read in my part of the matrix numerical values  */
1521     nz = procsnz[0];
1522     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1523 
1524     /* insert into matrix */
1525     jj      = rstart;
1526     smycols = mycols;
1527     svals   = vals;
1528     for ( i=0; i<m; i++ ) {
1529       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1530       smycols += ourlens[i];
1531       svals   += ourlens[i];
1532       jj++;
1533     }
1534 
1535     /* read in other processors and ship out */
1536     for ( i=1; i<size; i++ ) {
1537       nz = procsnz[i];
1538       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1539       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1540     }
1541     PetscFree(procsnz);
1542   }
1543   else {
1544     /* receive numeric values */
1545     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1546 
1547     /* receive message of values*/
1548     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1549     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1550     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1551 
1552     /* insert into matrix */
1553     jj      = rstart;
1554     smycols = mycols;
1555     svals   = vals;
1556     for ( i=0; i<m; i++ ) {
1557       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1558       smycols += ourlens[i];
1559       svals   += ourlens[i];
1560       jj++;
1561     }
1562   }
1563   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1564 
1565   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1566   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1567   return 0;
1568 }
1569