xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7fab95ffa0929fef22d7fc43e2547d4da133564f)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.115 1996/01/24 05:46:00 bsmith Exp balay $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int        ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift,roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (mat->assembled) {
67             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
68             col = aij->colmap[in[j]] + shift;
69             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
70               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
71               col =  in[j];
72             }
73           }
74           else col = in[j];
75           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       if (roworiented) {
82         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
83       }
84       else {
85         row = im[i];
86         for ( j=0; j<n; j++ ) {
87           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   return 0;
93 }
94 
95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96 {
97   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100   int        cstart = aij->cstart, cend = aij->cend,row,col;
101   int        shift = C->indexshift;
102 
103   for ( i=0; i<m; i++ ) {
104     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106     if (idxm[i] >= rstart && idxm[i] < rend) {
107       row = idxm[i] - rstart;
108       for ( j=0; j<n; j++ ) {
109         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111         if (idxn[j] >= cstart && idxn[j] < cend){
112           col = idxn[j] - cstart;
113           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114         }
115         else {
116           col = aij->colmap[idxn[j]] + shift;
117           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118         }
119       }
120     }
121     else {
122       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123     }
124   }
125   return 0;
126 }
127 
128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
129 {
130   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131   MPI_Comm    comm = mat->comm;
132   int         size = aij->size, *owners = aij->rowners;
133   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
134   MPI_Request *send_waits,*recv_waits;
135   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
136   InsertMode  addv;
137   Scalar      *rvalues,*svalues;
138 
139   /* make sure all processors are either in INSERTMODE or ADDMODE */
140   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
143   }
144   aij->insertmode = addv; /* in case this processor had no cache */
145 
146   /*  first count number of contributors to each processor */
147   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
149   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
150   for ( i=0; i<aij->stash.n; i++ ) {
151     idx = aij->stash.idx[i];
152     for ( j=0; j<size; j++ ) {
153       if (idx >= owners[j] && idx < owners[j+1]) {
154         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
155       }
156     }
157   }
158   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
159 
160   /* inform other processors of number of messages and max length*/
161   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
162   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
163   nreceives = work[rank];
164   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
165   nmax = work[rank];
166   PetscFree(work);
167 
168   /* post receives:
169        1) each message will consist of ordered pairs
170      (global index,value) we store the global index as a double
171      to simplify the message passing.
172        2) since we don't know how long each individual message is we
173      allocate the largest needed buffer for each receive. Potentially
174      this is a lot of wasted space.
175 
176 
177        This could be done better.
178   */
179   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
180   CHKPTRQ(rvalues);
181   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
182   CHKPTRQ(recv_waits);
183   for ( i=0; i<nreceives; i++ ) {
184     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
185               comm,recv_waits+i);
186   }
187 
188   /* do sends:
189       1) starts[i] gives the starting index in svalues for stuff going to
190          the ith processor
191   */
192   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
193   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
194   CHKPTRQ(send_waits);
195   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
196   starts[0] = 0;
197   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
198   for ( i=0; i<aij->stash.n; i++ ) {
199     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
200     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
201     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
202   }
203   PetscFree(owner);
204   starts[0] = 0;
205   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
206   count = 0;
207   for ( i=0; i<size; i++ ) {
208     if (procs[i]) {
209       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
210                 comm,send_waits+count++);
211     }
212   }
213   PetscFree(starts); PetscFree(nprocs);
214 
215   /* Free cache space */
216   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
217 
218   aij->svalues    = svalues;    aij->rvalues    = rvalues;
219   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
220   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
221   aij->rmax       = nmax;
222 
223   return 0;
224 }
225 extern int MatSetUpMultiply_MPIAIJ(Mat);
226 
227 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
228 {
229   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
230   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
231   MPI_Status  *send_status,recv_status;
232   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
233   int         row,col,other_disassembled,shift = C->indexshift;
234   Scalar      *values,val;
235   InsertMode  addv = aij->insertmode;
236 
237   /*  wait on receives */
238   while (count) {
239     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
240     /* unpack receives into our local space */
241     values = aij->rvalues + 3*imdex*aij->rmax;
242     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
243     n = n/3;
244     for ( i=0; i<n; i++ ) {
245       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
246       col = (int) PETSCREAL(values[3*i+1]);
247       val = values[3*i+2];
248       if (col >= aij->cstart && col < aij->cend) {
249         col -= aij->cstart;
250         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
251       }
252       else {
253         if (mat->assembled) {
254           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
255           col = aij->colmap[col] + shift;
256           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
257             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
258             col = (int) PETSCREAL(values[3*i+1]);
259           }
260         }
261         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
262       }
263     }
264     count--;
265   }
266   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
267 
268   /* wait on sends */
269   if (aij->nsends) {
270     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
271     CHKPTRQ(send_status);
272     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
273     PetscFree(send_status);
274   }
275   PetscFree(aij->send_waits); PetscFree(aij->svalues);
276 
277   aij->insertmode = NOT_SET_VALUES;
278   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
279   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
280 
281   /* determine if any processor has disassembled, if so we must
282      also disassemble ourselfs, in order that we may reassemble. */
283   MPI_Allreduce(&mat->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
284   if (mat->assembled && !other_disassembled) {
285     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
286   }
287 
288   if (!mat->assembled && mode == FINAL_ASSEMBLY) {
289     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
290   }
291   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
292   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
293 
294   return 0;
295 }
296 
297 static int MatZeroEntries_MPIAIJ(Mat A)
298 {
299   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
300   int        ierr;
301   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
302   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
303   return 0;
304 }
305 
306 /* the code does not do the diagonal entries correctly unless the
307    matrix is square and the column and row owerships are identical.
308    This is a BUG. The only way to fix it seems to be to access
309    aij->A and aij->B directly and not through the MatZeroRows()
310    routine.
311 */
312 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
313 {
314   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
315   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
316   int            *procs,*nprocs,j,found,idx,nsends,*work;
317   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
318   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
319   int            *lens,imdex,*lrows,*values;
320   MPI_Comm       comm = A->comm;
321   MPI_Request    *send_waits,*recv_waits;
322   MPI_Status     recv_status,*send_status;
323   IS             istmp;
324 
325   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
326   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
327 
328   /*  first count number of contributors to each processor */
329   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
330   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
331   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
332   for ( i=0; i<N; i++ ) {
333     idx = rows[i];
334     found = 0;
335     for ( j=0; j<size; j++ ) {
336       if (idx >= owners[j] && idx < owners[j+1]) {
337         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
338       }
339     }
340     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
341   }
342   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
343 
344   /* inform other processors of number of messages and max length*/
345   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
346   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
347   nrecvs = work[rank];
348   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
349   nmax = work[rank];
350   PetscFree(work);
351 
352   /* post receives:   */
353   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
354   CHKPTRQ(rvalues);
355   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
356   CHKPTRQ(recv_waits);
357   for ( i=0; i<nrecvs; i++ ) {
358     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
359   }
360 
361   /* do sends:
362       1) starts[i] gives the starting index in svalues for stuff going to
363          the ith processor
364   */
365   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
366   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
367   CHKPTRQ(send_waits);
368   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
369   starts[0] = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<N; i++ ) {
372     svalues[starts[owner[i]]++] = rows[i];
373   }
374   ISRestoreIndices(is,&rows);
375 
376   starts[0] = 0;
377   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
382     }
383   }
384   PetscFree(starts);
385 
386   base = owners[rank];
387 
388   /*  wait on receives */
389   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
390   source = lens + nrecvs;
391   count  = nrecvs; slen = 0;
392   while (count) {
393     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
394     /* unpack receives into our local space */
395     MPI_Get_count(&recv_status,MPI_INT,&n);
396     source[imdex]  = recv_status.MPI_SOURCE;
397     lens[imdex]  = n;
398     slen += n;
399     count--;
400   }
401   PetscFree(recv_waits);
402 
403   /* move the data into the send scatter */
404   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
405   count = 0;
406   for ( i=0; i<nrecvs; i++ ) {
407     values = rvalues + i*nmax;
408     for ( j=0; j<lens[i]; j++ ) {
409       lrows[count++] = values[j] - base;
410     }
411   }
412   PetscFree(rvalues); PetscFree(lens);
413   PetscFree(owner); PetscFree(nprocs);
414 
415   /* actually zap the local rows */
416   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
417   PLogObjectParent(A,istmp);
418   PetscFree(lrows);
419   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
420   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
421   ierr = ISDestroy(istmp); CHKERRQ(ierr);
422 
423   /* wait on sends */
424   if (nsends) {
425     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
426     CHKPTRQ(send_status);
427     MPI_Waitall(nsends,send_waits,send_status);
428     PetscFree(send_status);
429   }
430   PetscFree(send_waits); PetscFree(svalues);
431 
432   return 0;
433 }
434 
435 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
436 {
437   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
438   int        ierr;
439 
440   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
441   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
442   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
443   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
444   return 0;
445 }
446 
447 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
448 {
449   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
450   int        ierr;
451   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
452   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
454   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
455   return 0;
456 }
457 
458 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
459 {
460   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
461   int        ierr;
462 
463   /* do nondiagonal part */
464   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
465   /* send it on its way */
466   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
467                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
468   /* do local part */
469   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
470   /* receive remote parts: note this assumes the values are not actually */
471   /* inserted in yy until the next line, which is true for my implementation*/
472   /* but is not perhaps always true. */
473   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
474                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
475   return 0;
476 }
477 
478 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
479 {
480   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
481   int        ierr;
482 
483   /* do nondiagonal part */
484   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
485   /* send it on its way */
486   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
487                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
488   /* do local part */
489   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
490   /* receive remote parts: note this assumes the values are not actually */
491   /* inserted in yy until the next line, which is true for my implementation*/
492   /* but is not perhaps always true. */
493   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
494                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495   return 0;
496 }
497 
498 /*
499   This only works correctly for square matrices where the subblock A->A is the
500    diagonal block
501 */
502 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
503 {
504   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
505   return MatGetDiagonal(a->A,v);
506 }
507 
508 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
509 {
510   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
511   int        ierr;
512   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
513   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
514   return 0;
515 }
516 
517 static int MatDestroy_MPIAIJ(PetscObject obj)
518 {
519   Mat        mat = (Mat) obj;
520   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
521   int        ierr;
522 #if defined(PETSC_LOG)
523   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
524 #endif
525   PetscFree(aij->rowners);
526   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
527   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
528   if (aij->colmap) PetscFree(aij->colmap);
529   if (aij->garray) PetscFree(aij->garray);
530   if (aij->lvec)   VecDestroy(aij->lvec);
531   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
532   PetscFree(aij);
533   PLogObjectDestroy(mat);
534   PetscHeaderDestroy(mat);
535   return 0;
536 }
537 #include "draw.h"
538 #include "pinclude/pviewer.h"
539 
540 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
541 {
542   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
543   int         ierr;
544 
545   if (aij->size == 1) {
546     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
547   }
548   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
549   return 0;
550 }
551 
552 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
553 {
554   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
555   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
556   int         ierr, format,shift = C->indexshift,rank;
557   PetscObject vobj = (PetscObject) viewer;
558   FILE        *fd;
559 
560   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
561     ierr = ViewerFileGetFormat_Private(viewer,&format);
562     if (format == FILE_FORMAT_INFO_DETAILED) {
563       int nz,nzalloc,mem;
564       MPI_Comm_rank(mat->comm,&rank);
565       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
566       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
567       MPIU_Seq_begin(mat->comm,1);
568       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
569                 nzalloc,mem);
570       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
571       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
572       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
573       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
574       fflush(fd);
575       MPIU_Seq_end(mat->comm,1);
576       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
577       return 0;
578     }
579     else if (format == FILE_FORMAT_INFO) {
580       return 0;
581     }
582   }
583 
584   if (vobj->type == ASCII_FILE_VIEWER) {
585     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
586     MPIU_Seq_begin(mat->comm,1);
587     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
588            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
589            aij->cend);
590     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
591     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
592     fflush(fd);
593     MPIU_Seq_end(mat->comm,1);
594   }
595   else {
596     int size = aij->size;
597     rank = aij->rank;
598     if (size == 1) {
599       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
600     }
601     else {
602       /* assemble the entire matrix onto first processor. */
603       Mat         A;
604       Mat_SeqAIJ *Aloc;
605       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
606       Scalar      *a;
607 
608       if (!rank) {
609         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
610                CHKERRQ(ierr);
611       }
612       else {
613         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
614                CHKERRQ(ierr);
615       }
616       PLogObjectParent(mat,A);
617 
618       /* copy over the A part */
619       Aloc = (Mat_SeqAIJ*) aij->A->data;
620       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
621       row = aij->rstart;
622       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
623       for ( i=0; i<m; i++ ) {
624         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
625         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
626       }
627       aj = Aloc->j;
628       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
629 
630       /* copy over the B part */
631       Aloc = (Mat_SeqAIJ*) aij->B->data;
632       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
633       row = aij->rstart;
634       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
635       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
636       for ( i=0; i<m; i++ ) {
637         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
638         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
639       }
640       PetscFree(ct);
641       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
642       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
643       if (!rank) {
644         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
645       }
646       ierr = MatDestroy(A); CHKERRQ(ierr);
647     }
648   }
649   return 0;
650 }
651 
652 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
653 {
654   Mat         mat = (Mat) obj;
655   int         ierr;
656   PetscObject vobj = (PetscObject) viewer;
657 
658   if (!viewer) {
659     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
660   }
661   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
662   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
663     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
664   }
665   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
666     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
667   }
668   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
669     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
670   }
671   else if (vobj->cookie == DRAW_COOKIE) {
672     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
673   }
674   else if (vobj->type == BINARY_FILE_VIEWER) {
675     return MatView_MPIAIJ_Binary(mat,viewer);
676   }
677   return 0;
678 }
679 
680 extern int MatMarkDiag_SeqAIJ(Mat);
681 /*
682     This has to provide several versions.
683 
684      1) per sequential
685      2) a) use only local smoothing updating outer values only once.
686         b) local smoothing updating outer values each inner iteration
687      3) color updating out values betwen colors.
688 */
689 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
690                            double fshift,int its,Vec xx)
691 {
692   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
693   Mat        AA = mat->A, BB = mat->B;
694   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
695   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
696   int        ierr,*idx, *diag;
697   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
698   Vec        tt;
699 
700   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
701   xs = x + shift; /* shift by one for index start of 1 */
702   ls = ls + shift;
703   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
704   diag = A->diag;
705   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
706     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
707   }
708   if (flag & SOR_EISENSTAT) {
709     /* Let  A = L + U + D; where L is lower trianglar,
710     U is upper triangular, E is diagonal; This routine applies
711 
712             (L + E)^{-1} A (U + E)^{-1}
713 
714     to a vector efficiently using Eisenstat's trick. This is for
715     the case of SSOR preconditioner, so E is D/omega where omega
716     is the relaxation factor.
717     */
718     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
719     PLogObjectParent(matin,tt);
720     VecGetArray(tt,&t);
721     scale = (2.0/omega) - 1.0;
722     /*  x = (E + U)^{-1} b */
723     VecSet(&zero,mat->lvec);
724     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
725                               mat->Mvctx); CHKERRQ(ierr);
726     for ( i=m-1; i>-1; i-- ) {
727       n    = A->i[i+1] - diag[i] - 1;
728       idx  = A->j + diag[i] + !shift;
729       v    = A->a + diag[i] + !shift;
730       sum  = b[i];
731       SPARSEDENSEMDOT(sum,xs,v,idx,n);
732       d    = fshift + A->a[diag[i]+shift];
733       n    = B->i[i+1] - B->i[i];
734       idx  = B->j + B->i[i] + shift;
735       v    = B->a + B->i[i] + shift;
736       SPARSEDENSEMDOT(sum,ls,v,idx,n);
737       x[i] = omega*(sum/d);
738     }
739     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
740                             mat->Mvctx); CHKERRQ(ierr);
741 
742     /*  t = b - (2*E - D)x */
743     v = A->a;
744     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
745 
746     /*  t = (E + L)^{-1}t */
747     ts = t + shift; /* shifted by one for index start of a or mat->j*/
748     diag = A->diag;
749     VecSet(&zero,mat->lvec);
750     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
751                                                  mat->Mvctx); CHKERRQ(ierr);
752     for ( i=0; i<m; i++ ) {
753       n    = diag[i] - A->i[i];
754       idx  = A->j + A->i[i] + shift;
755       v    = A->a + A->i[i] + shift;
756       sum  = t[i];
757       SPARSEDENSEMDOT(sum,ts,v,idx,n);
758       d    = fshift + A->a[diag[i]+shift];
759       n    = B->i[i+1] - B->i[i];
760       idx  = B->j + B->i[i] + shift;
761       v    = B->a + B->i[i] + shift;
762       SPARSEDENSEMDOT(sum,ls,v,idx,n);
763       t[i] = omega*(sum/d);
764     }
765     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
766                                                     mat->Mvctx); CHKERRQ(ierr);
767     /*  x = x + t */
768     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
769     VecDestroy(tt);
770     return 0;
771   }
772 
773 
774   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
775     if (flag & SOR_ZERO_INITIAL_GUESS) {
776       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
777     }
778     else {
779       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
780       CHKERRQ(ierr);
781       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
782       CHKERRQ(ierr);
783     }
784     while (its--) {
785       /* go down through the rows */
786       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
787                               mat->Mvctx); CHKERRQ(ierr);
788       for ( i=0; i<m; i++ ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] + shift;
791         v    = A->a + A->i[i] + shift;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = fshift + A->a[diag[i]+shift];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] + shift;
797         v    = B->a + B->i[i] + shift;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800       }
801       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
802                             mat->Mvctx); CHKERRQ(ierr);
803       /* come up through the rows */
804       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
805                               mat->Mvctx); CHKERRQ(ierr);
806       for ( i=m-1; i>-1; i-- ) {
807         n    = A->i[i+1] - A->i[i];
808         idx  = A->j + A->i[i] + shift;
809         v    = A->a + A->i[i] + shift;
810         sum  = b[i];
811         SPARSEDENSEMDOT(sum,xs,v,idx,n);
812         d    = fshift + A->a[diag[i]+shift];
813         n    = B->i[i+1] - B->i[i];
814         idx  = B->j + B->i[i] + shift;
815         v    = B->a + B->i[i] + shift;
816         SPARSEDENSEMDOT(sum,ls,v,idx,n);
817         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
818       }
819       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
820                             mat->Mvctx); CHKERRQ(ierr);
821     }
822   }
823   else if (flag & SOR_FORWARD_SWEEP){
824     if (flag & SOR_ZERO_INITIAL_GUESS) {
825       VecSet(&zero,mat->lvec);
826       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
827                               mat->Mvctx); CHKERRQ(ierr);
828       for ( i=0; i<m; i++ ) {
829         n    = diag[i] - A->i[i];
830         idx  = A->j + A->i[i] + shift;
831         v    = A->a + A->i[i] + shift;
832         sum  = b[i];
833         SPARSEDENSEMDOT(sum,xs,v,idx,n);
834         d    = fshift + A->a[diag[i]+shift];
835         n    = B->i[i+1] - B->i[i];
836         idx  = B->j + B->i[i] + shift;
837         v    = B->a + B->i[i] + shift;
838         SPARSEDENSEMDOT(sum,ls,v,idx,n);
839         x[i] = omega*(sum/d);
840       }
841       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
842                             mat->Mvctx); CHKERRQ(ierr);
843       its--;
844     }
845     while (its--) {
846       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
847       CHKERRQ(ierr);
848       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
849       CHKERRQ(ierr);
850       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
851                               mat->Mvctx); CHKERRQ(ierr);
852       for ( i=0; i<m; i++ ) {
853         n    = A->i[i+1] - A->i[i];
854         idx  = A->j + A->i[i] + shift;
855         v    = A->a + A->i[i] + shift;
856         sum  = b[i];
857         SPARSEDENSEMDOT(sum,xs,v,idx,n);
858         d    = fshift + A->a[diag[i]+shift];
859         n    = B->i[i+1] - B->i[i];
860         idx  = B->j + B->i[i] + shift;
861         v    = B->a + B->i[i] + shift;
862         SPARSEDENSEMDOT(sum,ls,v,idx,n);
863         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
864       }
865       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
866                             mat->Mvctx); CHKERRQ(ierr);
867     }
868   }
869   else if (flag & SOR_BACKWARD_SWEEP){
870     if (flag & SOR_ZERO_INITIAL_GUESS) {
871       VecSet(&zero,mat->lvec);
872       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
873                               mat->Mvctx); CHKERRQ(ierr);
874       for ( i=m-1; i>-1; i-- ) {
875         n    = A->i[i+1] - diag[i] - 1;
876         idx  = A->j + diag[i] + !shift;
877         v    = A->a + diag[i] + !shift;
878         sum  = b[i];
879         SPARSEDENSEMDOT(sum,xs,v,idx,n);
880         d    = fshift + A->a[diag[i]+shift];
881         n    = B->i[i+1] - B->i[i];
882         idx  = B->j + B->i[i] + shift;
883         v    = B->a + B->i[i] + shift;
884         SPARSEDENSEMDOT(sum,ls,v,idx,n);
885         x[i] = omega*(sum/d);
886       }
887       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
888                             mat->Mvctx); CHKERRQ(ierr);
889       its--;
890     }
891     while (its--) {
892       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
893                             mat->Mvctx); CHKERRQ(ierr);
894       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
895                             mat->Mvctx); CHKERRQ(ierr);
896       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
897                               mat->Mvctx); CHKERRQ(ierr);
898       for ( i=m-1; i>-1; i-- ) {
899         n    = A->i[i+1] - A->i[i];
900         idx  = A->j + A->i[i] + shift;
901         v    = A->a + A->i[i] + shift;
902         sum  = b[i];
903         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904         d    = fshift + A->a[diag[i]+shift];
905         n    = B->i[i+1] - B->i[i];
906         idx  = B->j + B->i[i] + shift;
907         v    = B->a + B->i[i] + shift;
908         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
910       }
911       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
912                             mat->Mvctx); CHKERRQ(ierr);
913     }
914   }
915   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
916     if (flag & SOR_ZERO_INITIAL_GUESS) {
917       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
918     }
919     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
920     CHKERRQ(ierr);
921     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
922     CHKERRQ(ierr);
923     while (its--) {
924       /* go down through the rows */
925       for ( i=0; i<m; i++ ) {
926         n    = A->i[i+1] - A->i[i];
927         idx  = A->j + A->i[i] + shift;
928         v    = A->a + A->i[i] + shift;
929         sum  = b[i];
930         SPARSEDENSEMDOT(sum,xs,v,idx,n);
931         d    = fshift + A->a[diag[i]+shift];
932         n    = B->i[i+1] - B->i[i];
933         idx  = B->j + B->i[i] + shift;
934         v    = B->a + B->i[i] + shift;
935         SPARSEDENSEMDOT(sum,ls,v,idx,n);
936         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
937       }
938       /* come up through the rows */
939       for ( i=m-1; i>-1; i-- ) {
940         n    = A->i[i+1] - A->i[i];
941         idx  = A->j + A->i[i] + shift;
942         v    = A->a + A->i[i] + shift;
943         sum  = b[i];
944         SPARSEDENSEMDOT(sum,xs,v,idx,n);
945         d    = fshift + A->a[diag[i]+shift];
946         n    = B->i[i+1] - B->i[i];
947         idx  = B->j + B->i[i] + shift;
948         v    = B->a + B->i[i] + shift;
949         SPARSEDENSEMDOT(sum,ls,v,idx,n);
950         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
951       }
952     }
953   }
954   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
955     if (flag & SOR_ZERO_INITIAL_GUESS) {
956       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
957     }
958     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
959     CHKERRQ(ierr);
960     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
961     CHKERRQ(ierr);
962     while (its--) {
963       for ( i=0; i<m; i++ ) {
964         n    = A->i[i+1] - A->i[i];
965         idx  = A->j + A->i[i] + shift;
966         v    = A->a + A->i[i] + shift;
967         sum  = b[i];
968         SPARSEDENSEMDOT(sum,xs,v,idx,n);
969         d    = fshift + A->a[diag[i]+shift];
970         n    = B->i[i+1] - B->i[i];
971         idx  = B->j + B->i[i] + shift;
972         v    = B->a + B->i[i] + shift;
973         SPARSEDENSEMDOT(sum,ls,v,idx,n);
974         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
975       }
976     }
977   }
978   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
979     if (flag & SOR_ZERO_INITIAL_GUESS) {
980       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
981     }
982     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
983                             mat->Mvctx); CHKERRQ(ierr);
984     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
985                             mat->Mvctx); CHKERRQ(ierr);
986     while (its--) {
987       for ( i=m-1; i>-1; i-- ) {
988         n    = A->i[i+1] - A->i[i];
989         idx  = A->j + A->i[i] + shift;
990         v    = A->a + A->i[i] + shift;
991         sum  = b[i];
992         SPARSEDENSEMDOT(sum,xs,v,idx,n);
993         d    = fshift + A->a[diag[i]+shift];
994         n    = B->i[i+1] - B->i[i];
995         idx  = B->j + B->i[i] + shift;
996         v    = B->a + B->i[i] + shift;
997         SPARSEDENSEMDOT(sum,ls,v,idx,n);
998         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
999       }
1000     }
1001   }
1002   return 0;
1003 }
1004 
1005 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1006                              int *nzalloc,int *mem)
1007 {
1008   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1009   Mat        A = mat->A, B = mat->B;
1010   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1011 
1012   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1013   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1014   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1015   if (flag == MAT_LOCAL) {
1016     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1017   } else if (flag == MAT_GLOBAL_MAX) {
1018     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1019     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1020   } else if (flag == MAT_GLOBAL_SUM) {
1021     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1022     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1023   }
1024   return 0;
1025 }
1026 
1027 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1028 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1029 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1030 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1031 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1032 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1033 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1034 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1035 
1036 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1037 {
1038   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1039 
1040   if (op == NO_NEW_NONZERO_LOCATIONS ||
1041       op == YES_NEW_NONZERO_LOCATIONS ||
1042       op == COLUMNS_SORTED ||
1043       op == ROW_ORIENTED) {
1044         MatSetOption(a->A,op);
1045         MatSetOption(a->B,op);
1046   }
1047   else if (op == ROWS_SORTED ||
1048            op == SYMMETRIC_MATRIX ||
1049            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1050            op == YES_NEW_DIAGONALS)
1051     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1052   else if (op == COLUMN_ORIENTED) {
1053     a->roworiented = 0;
1054     MatSetOption(a->A,op);
1055     MatSetOption(a->B,op);
1056   }
1057   else if (op == NO_NEW_DIAGONALS)
1058     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1059   else
1060     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1061   return 0;
1062 }
1063 
1064 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1065 {
1066   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1067   *m = mat->M; *n = mat->N;
1068   return 0;
1069 }
1070 
1071 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1072 {
1073   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1074   *m = mat->m; *n = mat->N;
1075   return 0;
1076 }
1077 
1078 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1079 {
1080   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081   *m = mat->rstart; *n = mat->rend;
1082   return 0;
1083 }
1084 
1085 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1086 {
1087   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1088   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1089   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1090   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1091 
1092   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1093   lrow = row - rstart;
1094 
1095   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1096   if (!v)   {pvA = 0; pvB = 0;}
1097   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1098   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1099   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1100   nztot = nzA + nzB;
1101 
1102   if (v  || idx) {
1103     if (nztot) {
1104       /* Sort by increasing column numbers, assuming A and B already sorted */
1105       int imark;
1106       if (matin->assembled) {
1107         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1108       }
1109       if (v) {
1110         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1111         for ( i=0; i<nzB; i++ ) {
1112           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1113           else break;
1114         }
1115         imark = i;
1116         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1117         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1118       }
1119       if (idx) {
1120         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1121         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1122         for ( i=0; i<nzB; i++ ) {
1123           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1124           else break;
1125         }
1126         imark = i;
1127         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1128         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1129       }
1130     }
1131     else {*idx = 0; *v=0;}
1132   }
1133   *nz = nztot;
1134   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1135   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1136   return 0;
1137 }
1138 
1139 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1140 {
1141   if (idx) PetscFree(*idx);
1142   if (v) PetscFree(*v);
1143   return 0;
1144 }
1145 
1146 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1147 {
1148   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1149   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1150   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1151   double     sum = 0.0;
1152   Scalar     *v;
1153 
1154   if (aij->size == 1) {
1155     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1156   } else {
1157     if (type == NORM_FROBENIUS) {
1158       v = amat->a;
1159       for (i=0; i<amat->nz; i++ ) {
1160 #if defined(PETSC_COMPLEX)
1161         sum += real(conj(*v)*(*v)); v++;
1162 #else
1163         sum += (*v)*(*v); v++;
1164 #endif
1165       }
1166       v = bmat->a;
1167       for (i=0; i<bmat->nz; i++ ) {
1168 #if defined(PETSC_COMPLEX)
1169         sum += real(conj(*v)*(*v)); v++;
1170 #else
1171         sum += (*v)*(*v); v++;
1172 #endif
1173       }
1174       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1175       *norm = sqrt(*norm);
1176     }
1177     else if (type == NORM_1) { /* max column norm */
1178       double *tmp, *tmp2;
1179       int    *jj, *garray = aij->garray;
1180       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1181       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1182       PetscMemzero(tmp,aij->N*sizeof(double));
1183       *norm = 0.0;
1184       v = amat->a; jj = amat->j;
1185       for ( j=0; j<amat->nz; j++ ) {
1186         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1187       }
1188       v = bmat->a; jj = bmat->j;
1189       for ( j=0; j<bmat->nz; j++ ) {
1190         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1191       }
1192       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1193       for ( j=0; j<aij->N; j++ ) {
1194         if (tmp2[j] > *norm) *norm = tmp2[j];
1195       }
1196       PetscFree(tmp); PetscFree(tmp2);
1197     }
1198     else if (type == NORM_INFINITY) { /* max row norm */
1199       double ntemp = 0.0;
1200       for ( j=0; j<amat->m; j++ ) {
1201         v = amat->a + amat->i[j] + shift;
1202         sum = 0.0;
1203         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1204           sum += PetscAbsScalar(*v); v++;
1205         }
1206         v = bmat->a + bmat->i[j] + shift;
1207         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1208           sum += PetscAbsScalar(*v); v++;
1209         }
1210         if (sum > ntemp) ntemp = sum;
1211       }
1212       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1213     }
1214     else {
1215       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1216     }
1217   }
1218   return 0;
1219 }
1220 
1221 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1222 {
1223   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1224   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1225   int        ierr,shift = Aloc->indexshift;
1226   Mat        B;
1227   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1228   Scalar     *array;
1229 
1230   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1231   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1232          PETSC_NULL,&B); CHKERRQ(ierr);
1233 
1234   /* copy over the A part */
1235   Aloc = (Mat_SeqAIJ*) a->A->data;
1236   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1237   row = a->rstart;
1238   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1239   for ( i=0; i<m; i++ ) {
1240     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1241     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1242   }
1243   aj = Aloc->j;
1244   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1245 
1246   /* copy over the B part */
1247   Aloc = (Mat_SeqAIJ*) a->B->data;
1248   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249   row = a->rstart;
1250   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1251   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1252   for ( i=0; i<m; i++ ) {
1253     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1254     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1255   }
1256   PetscFree(ct);
1257   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1258   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1259   if (matout) {
1260     *matout = B;
1261   } else {
1262     /* This isn't really an in-place transpose .... but free data structures from a */
1263     PetscFree(a->rowners);
1264     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1265     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1266     if (a->colmap) PetscFree(a->colmap);
1267     if (a->garray) PetscFree(a->garray);
1268     if (a->lvec) VecDestroy(a->lvec);
1269     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1270     PetscFree(a);
1271     PetscMemcpy(A,B,sizeof(struct _Mat));
1272     PetscHeaderDestroy(B);
1273   }
1274   return 0;
1275 }
1276 
1277 extern int MatPrintHelp_SeqAIJ(Mat);
1278 static int MatPrintHelp_MPIAIJ(Mat A)
1279 {
1280   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281 
1282   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1283   else return 0;
1284 }
1285 
1286 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1287 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1288 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1289 /* -------------------------------------------------------------------*/
1290 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1291        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1292        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1293        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1294        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1295        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1296        MatLUFactor_MPIAIJ,0,
1297        MatRelax_MPIAIJ,
1298        MatTranspose_MPIAIJ,
1299        MatGetInfo_MPIAIJ,0,
1300        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1301        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1302        0,
1303        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1304        MatGetReordering_MPIAIJ,
1305        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1306        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1307        MatILUFactorSymbolic_MPIAIJ,0,
1308        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1309        0,0,0,
1310        0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1311        MatPrintHelp_MPIAIJ,
1312        MatScale_MPIAIJ};
1313 
1314 /*@C
1315    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1316    (the default parallel PETSc format).
1317 
1318    Input Parameters:
1319 .  comm - MPI communicator
1320 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1321 .  n - number of local columns (or PETSC_DECIDE to have calculated
1322            if N is given)
1323 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1324 .  N - number of global columns (or PETSC_DECIDE to have calculated
1325            if n is given)
1326 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1327            (same for all local rows)
1328 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1329            or null (possibly different for each row).  You must leave room
1330            for the diagonal entry even if it is zero.
1331 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1332            submatrix (same for all local rows).
1333 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1334            submatrix or null (possibly different for each row).
1335 
1336    Output Parameter:
1337 .  newmat - the matrix
1338 
1339    Notes:
1340    The AIJ format (also called the Yale sparse matrix format or
1341    compressed row storage), is fully compatible with standard Fortran 77
1342    storage.  That is, the stored row and column indices can begin at
1343    either one (as in Fortran) or zero.  See the users manual for details.
1344 
1345    The user MUST specify either the local or global matrix dimensions
1346    (possibly both).
1347 
1348    Storage Information:
1349    For a square global matrix we define each processor's diagonal portion
1350    to be its local rows and the corresponding columns (a square submatrix);
1351    each processor's off-diagonal portion encompasses the remainder of the
1352    local matrix (a rectangular submatrix).
1353 
1354    The user can specify preallocated storage for the diagonal part of
1355    the local submatrix with either d_nz or d_nnz (not both).  Set
1356    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1357    memory allocation.  Likewise, specify preallocated storage for the
1358    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1359 
1360    By default, this format uses inodes (identical nodes) when possible.
1361    We search for consecutive rows with the same nonzero structure, thereby
1362    reusing matrix information to achieve increased efficiency.
1363 
1364    Options Database Keys:
1365 $    -mat_aij_no_inode  - Do not use inodes
1366 $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1367 
1368 .keywords: matrix, aij, compressed row, sparse, parallel
1369 
1370 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1371 @*/
1372 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1373                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
1374 {
1375   Mat          mat;
1376   Mat_MPIAIJ   *a;
1377   int          ierr, i,sum[2],work[2];
1378 
1379   *newmat         = 0;
1380   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1381   PLogObjectCreate(mat);
1382   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1383   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1384   mat->destroy    = MatDestroy_MPIAIJ;
1385   mat->view       = MatView_MPIAIJ;
1386   mat->factor     = 0;
1387   mat->assembled  = PETSC_FALSE;
1388 
1389   a->insertmode = NOT_SET_VALUES;
1390   MPI_Comm_rank(comm,&a->rank);
1391   MPI_Comm_size(comm,&a->size);
1392 
1393   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1394     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1395 
1396   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1397     work[0] = m; work[1] = n;
1398     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1399     if (M == PETSC_DECIDE) M = sum[0];
1400     if (N == PETSC_DECIDE) N = sum[1];
1401   }
1402   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1403   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1404   a->m = m;
1405   a->n = n;
1406   a->N = N;
1407   a->M = M;
1408 
1409   /* build local table of row and column ownerships */
1410   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1411   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1412   a->cowners = a->rowners + a->size + 1;
1413   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1414   a->rowners[0] = 0;
1415   for ( i=2; i<=a->size; i++ ) {
1416     a->rowners[i] += a->rowners[i-1];
1417   }
1418   a->rstart = a->rowners[a->rank];
1419   a->rend   = a->rowners[a->rank+1];
1420   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1421   a->cowners[0] = 0;
1422   for ( i=2; i<=a->size; i++ ) {
1423     a->cowners[i] += a->cowners[i-1];
1424   }
1425   a->cstart = a->cowners[a->rank];
1426   a->cend   = a->cowners[a->rank+1];
1427 
1428   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1429   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1430   PLogObjectParent(mat,a->A);
1431   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1432   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1433   PLogObjectParent(mat,a->B);
1434 
1435   /* build cache for off array entries formed */
1436   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1437   a->colmap      = 0;
1438   a->garray      = 0;
1439   a->roworiented = 1;
1440 
1441   /* stuff used for matrix vector multiply */
1442   a->lvec      = 0;
1443   a->Mvctx     = 0;
1444 
1445   *newmat = mat;
1446   return 0;
1447 }
1448 
1449 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1450 {
1451   Mat        mat;
1452   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1453   int        ierr, len,flg;
1454 
1455   *newmat       = 0;
1456   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1457   PLogObjectCreate(mat);
1458   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1459   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1460   mat->destroy    = MatDestroy_MPIAIJ;
1461   mat->view       = MatView_MPIAIJ;
1462   mat->factor     = matin->factor;
1463   mat->assembled  = PETSC_TRUE;
1464 
1465   a->m          = oldmat->m;
1466   a->n          = oldmat->n;
1467   a->M          = oldmat->M;
1468   a->N          = oldmat->N;
1469 
1470   a->rstart     = oldmat->rstart;
1471   a->rend       = oldmat->rend;
1472   a->cstart     = oldmat->cstart;
1473   a->cend       = oldmat->cend;
1474   a->size       = oldmat->size;
1475   a->rank       = oldmat->rank;
1476   a->insertmode = NOT_SET_VALUES;
1477 
1478   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1479   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1480   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1481   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1482   if (oldmat->colmap) {
1483     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1484     PLogObjectMemory(mat,(a->N)*sizeof(int));
1485     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1486   } else a->colmap = 0;
1487   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1488     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1489     PLogObjectMemory(mat,len*sizeof(int));
1490     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1491   } else a->garray = 0;
1492 
1493   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1494   PLogObjectParent(mat,a->lvec);
1495   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1496   PLogObjectParent(mat,a->Mvctx);
1497   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1498   PLogObjectParent(mat,a->A);
1499   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1500   PLogObjectParent(mat,a->B);
1501   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1502   if (flg) {
1503     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1504   }
1505   *newmat = mat;
1506   return 0;
1507 }
1508 
1509 #include "sysio.h"
1510 
1511 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1512 {
1513   Mat          A;
1514   int          i, nz, ierr, j,rstart, rend, fd;
1515   Scalar       *vals,*svals;
1516   PetscObject  vobj = (PetscObject) bview;
1517   MPI_Comm     comm = vobj->comm;
1518   MPI_Status   status;
1519   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1520   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1521   int          tag = ((PetscObject)bview)->tag;
1522 
1523   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1524   if (!rank) {
1525     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1526     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1527     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1528   }
1529 
1530   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1531   M = header[1]; N = header[2];
1532   /* determine ownership of all rows */
1533   m = M/size + ((M % size) > rank);
1534   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1535   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1536   rowners[0] = 0;
1537   for ( i=2; i<=size; i++ ) {
1538     rowners[i] += rowners[i-1];
1539   }
1540   rstart = rowners[rank];
1541   rend   = rowners[rank+1];
1542 
1543   /* distribute row lengths to all processors */
1544   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1545   offlens = ourlens + (rend-rstart);
1546   if (!rank) {
1547     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1548     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1549     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1550     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1551     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1552     PetscFree(sndcounts);
1553   }
1554   else {
1555     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1556   }
1557 
1558   if (!rank) {
1559     /* calculate the number of nonzeros on each processor */
1560     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1561     PetscMemzero(procsnz,size*sizeof(int));
1562     for ( i=0; i<size; i++ ) {
1563       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1564         procsnz[i] += rowlengths[j];
1565       }
1566     }
1567     PetscFree(rowlengths);
1568 
1569     /* determine max buffer needed and allocate it */
1570     maxnz = 0;
1571     for ( i=0; i<size; i++ ) {
1572       maxnz = PetscMax(maxnz,procsnz[i]);
1573     }
1574     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1575 
1576     /* read in my part of the matrix column indices  */
1577     nz = procsnz[0];
1578     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1579     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1580 
1581     /* read in every one elses and ship off */
1582     for ( i=1; i<size; i++ ) {
1583       nz = procsnz[i];
1584       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1585       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1586     }
1587     PetscFree(cols);
1588   }
1589   else {
1590     /* determine buffer space needed for message */
1591     nz = 0;
1592     for ( i=0; i<m; i++ ) {
1593       nz += ourlens[i];
1594     }
1595     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1596 
1597     /* receive message of column indices*/
1598     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1599     MPI_Get_count(&status,MPI_INT,&maxnz);
1600     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1601   }
1602 
1603   /* loop over local rows, determining number of off diagonal entries */
1604   PetscMemzero(offlens,m*sizeof(int));
1605   jj = 0;
1606   for ( i=0; i<m; i++ ) {
1607     for ( j=0; j<ourlens[i]; j++ ) {
1608       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1609       jj++;
1610     }
1611   }
1612 
1613   /* create our matrix */
1614   for ( i=0; i<m; i++ ) {
1615     ourlens[i] -= offlens[i];
1616   }
1617   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1618   A = *newmat;
1619   MatSetOption(A,COLUMNS_SORTED);
1620   for ( i=0; i<m; i++ ) {
1621     ourlens[i] += offlens[i];
1622   }
1623 
1624   if (!rank) {
1625     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1626 
1627     /* read in my part of the matrix numerical values  */
1628     nz = procsnz[0];
1629     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1630 
1631     /* insert into matrix */
1632     jj      = rstart;
1633     smycols = mycols;
1634     svals   = vals;
1635     for ( i=0; i<m; i++ ) {
1636       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1637       smycols += ourlens[i];
1638       svals   += ourlens[i];
1639       jj++;
1640     }
1641 
1642     /* read in other processors and ship out */
1643     for ( i=1; i<size; i++ ) {
1644       nz = procsnz[i];
1645       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1646       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1647     }
1648     PetscFree(procsnz);
1649   }
1650   else {
1651     /* receive numeric values */
1652     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1653 
1654     /* receive message of values*/
1655     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1656     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1657     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1658 
1659     /* insert into matrix */
1660     jj      = rstart;
1661     smycols = mycols;
1662     svals   = vals;
1663     for ( i=0; i<m; i++ ) {
1664       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1665       smycols += ourlens[i];
1666       svals   += ourlens[i];
1667       jj++;
1668     }
1669   }
1670   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1671 
1672   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1673   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1674   return 0;
1675 }
1676