xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 00c0f3e66c6fddb38b0ebee242913ce034d3fa59)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.109 1996/01/12 22:07:09 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int        ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift,roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (aij->assembled) {
67             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
68             col = aij->colmap[in[j]] + shift;
69             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
70               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
71               col =  in[j];
72             }
73           }
74           else col = in[j];
75           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       if (roworiented) {
82         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
83       }
84       else {
85         row = im[i];
86         for ( j=0; j<n; j++ ) {
87           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   return 0;
93 }
94 
95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96 {
97   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100   int        cstart = aij->cstart, cend = aij->cend,row,col;
101   int        shift = C->indexshift;
102 
103   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
104   for ( i=0; i<m; i++ ) {
105     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
106     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
107     if (idxm[i] >= rstart && idxm[i] < rend) {
108       row = idxm[i] - rstart;
109       for ( j=0; j<n; j++ ) {
110         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
111         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
112         if (idxn[j] >= cstart && idxn[j] < cend){
113           col = idxn[j] - cstart;
114           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
115         }
116         else {
117           col = aij->colmap[idxn[j]] + shift;
118           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
119         }
120       }
121     }
122     else {
123       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
124     }
125   }
126   return 0;
127 }
128 
129 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
130 {
131   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
132   MPI_Comm    comm = mat->comm;
133   int         size = aij->size, *owners = aij->rowners;
134   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
135   MPI_Request *send_waits,*recv_waits;
136   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
137   InsertMode  addv;
138   Scalar      *rvalues,*svalues;
139 
140   /* make sure all processors are either in INSERTMODE or ADDMODE */
141   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
142   if (addv == (ADD_VALUES|INSERT_VALUES)) {
143     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
144   }
145   aij->insertmode = addv; /* in case this processor had no cache */
146 
147   /*  first count number of contributors to each processor */
148   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
149   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
150   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
151   for ( i=0; i<aij->stash.n; i++ ) {
152     idx = aij->stash.idx[i];
153     for ( j=0; j<size; j++ ) {
154       if (idx >= owners[j] && idx < owners[j+1]) {
155         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
156       }
157     }
158   }
159   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
160 
161   /* inform other processors of number of messages and max length*/
162   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
163   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
164   nreceives = work[rank];
165   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
166   nmax = work[rank];
167   PetscFree(work);
168 
169   /* post receives:
170        1) each message will consist of ordered pairs
171      (global index,value) we store the global index as a double
172      to simplify the message passing.
173        2) since we don't know how long each individual message is we
174      allocate the largest needed buffer for each receive. Potentially
175      this is a lot of wasted space.
176 
177 
178        This could be done better.
179   */
180   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
181   CHKPTRQ(rvalues);
182   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
183   CHKPTRQ(recv_waits);
184   for ( i=0; i<nreceives; i++ ) {
185     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
186               comm,recv_waits+i);
187   }
188 
189   /* do sends:
190       1) starts[i] gives the starting index in svalues for stuff going to
191          the ith processor
192   */
193   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
194   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
195   CHKPTRQ(send_waits);
196   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
197   starts[0] = 0;
198   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
199   for ( i=0; i<aij->stash.n; i++ ) {
200     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
201     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
202     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
203   }
204   PetscFree(owner);
205   starts[0] = 0;
206   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
207   count = 0;
208   for ( i=0; i<size; i++ ) {
209     if (procs[i]) {
210       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
211                 comm,send_waits+count++);
212     }
213   }
214   PetscFree(starts); PetscFree(nprocs);
215 
216   /* Free cache space */
217   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
218 
219   aij->svalues    = svalues;    aij->rvalues    = rvalues;
220   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
221   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
222   aij->rmax       = nmax;
223 
224   return 0;
225 }
226 extern int MatSetUpMultiply_MPIAIJ(Mat);
227 
228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
229 {
230   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
232   MPI_Status  *send_status,recv_status;
233   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234   int         row,col,other_disassembled,shift = C->indexshift;
235   Scalar      *values,val;
236   InsertMode  addv = aij->insertmode;
237 
238   /*  wait on receives */
239   while (count) {
240     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
241     /* unpack receives into our local space */
242     values = aij->rvalues + 3*imdex*aij->rmax;
243     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
244     n = n/3;
245     for ( i=0; i<n; i++ ) {
246       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
247       col = (int) PETSCREAL(values[3*i+1]);
248       val = values[3*i+2];
249       if (col >= aij->cstart && col < aij->cend) {
250         col -= aij->cstart;
251         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
252       }
253       else {
254         if (aij->assembled) {
255           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256           col = aij->colmap[col] + shift;
257           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
258             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259             col = (int) PETSCREAL(values[3*i+1]);
260           }
261         }
262         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
263       }
264     }
265     count--;
266   }
267   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
268 
269   /* wait on sends */
270   if (aij->nsends) {
271     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
272     CHKPTRQ(send_status);
273     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
274     PetscFree(send_status);
275   }
276   PetscFree(aij->send_waits); PetscFree(aij->svalues);
277 
278   aij->insertmode = NOT_SET_VALUES;
279   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
280   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
281 
282   /* determine if any processor has disassembled, if so we must
283      also disassemble ourselfs, in order that we may reassemble. */
284   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285   if (aij->assembled && !other_disassembled) {
286     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
287   }
288 
289   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
290     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
291     aij->assembled = 1;
292   }
293   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
294   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
295 
296   return 0;
297 }
298 
299 static int MatZeroEntries_MPIAIJ(Mat A)
300 {
301   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302   int        ierr;
303   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
304   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
305   return 0;
306 }
307 
308 /* the code does not do the diagonal entries correctly unless the
309    matrix is square and the column and row owerships are identical.
310    This is a BUG. The only way to fix it seems to be to access
311    aij->A and aij->B directly and not through the MatZeroRows()
312    routine.
313 */
314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
315 {
316   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
317   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
318   int            *procs,*nprocs,j,found,idx,nsends,*work;
319   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
320   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321   int            *lens,imdex,*lrows,*values;
322   MPI_Comm       comm = A->comm;
323   MPI_Request    *send_waits,*recv_waits;
324   MPI_Status     recv_status,*send_status;
325   IS             istmp;
326 
327   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
328   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
329   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
330 
331   /*  first count number of contributors to each processor */
332   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
333   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
334   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
335   for ( i=0; i<N; i++ ) {
336     idx = rows[i];
337     found = 0;
338     for ( j=0; j<size; j++ ) {
339       if (idx >= owners[j] && idx < owners[j+1]) {
340         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
341       }
342     }
343     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
344   }
345   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
346 
347   /* inform other processors of number of messages and max length*/
348   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
349   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
350   nrecvs = work[rank];
351   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
352   nmax = work[rank];
353   PetscFree(work);
354 
355   /* post receives:   */
356   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
357   CHKPTRQ(rvalues);
358   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
359   CHKPTRQ(recv_waits);
360   for ( i=0; i<nrecvs; i++ ) {
361     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
362   }
363 
364   /* do sends:
365       1) starts[i] gives the starting index in svalues for stuff going to
366          the ith processor
367   */
368   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
369   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
370   CHKPTRQ(send_waits);
371   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
372   starts[0] = 0;
373   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
374   for ( i=0; i<N; i++ ) {
375     svalues[starts[owner[i]]++] = rows[i];
376   }
377   ISRestoreIndices(is,&rows);
378 
379   starts[0] = 0;
380   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
381   count = 0;
382   for ( i=0; i<size; i++ ) {
383     if (procs[i]) {
384       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
385     }
386   }
387   PetscFree(starts);
388 
389   base = owners[rank];
390 
391   /*  wait on receives */
392   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
393   source = lens + nrecvs;
394   count  = nrecvs; slen = 0;
395   while (count) {
396     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
397     /* unpack receives into our local space */
398     MPI_Get_count(&recv_status,MPI_INT,&n);
399     source[imdex]  = recv_status.MPI_SOURCE;
400     lens[imdex]  = n;
401     slen += n;
402     count--;
403   }
404   PetscFree(recv_waits);
405 
406   /* move the data into the send scatter */
407   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
408   count = 0;
409   for ( i=0; i<nrecvs; i++ ) {
410     values = rvalues + i*nmax;
411     for ( j=0; j<lens[i]; j++ ) {
412       lrows[count++] = values[j] - base;
413     }
414   }
415   PetscFree(rvalues); PetscFree(lens);
416   PetscFree(owner); PetscFree(nprocs);
417 
418   /* actually zap the local rows */
419   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
420   PLogObjectParent(A,istmp);
421   PetscFree(lrows);
422   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
423   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
424   ierr = ISDestroy(istmp); CHKERRQ(ierr);
425 
426   /* wait on sends */
427   if (nsends) {
428     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
429     CHKPTRQ(send_status);
430     MPI_Waitall(nsends,send_waits,send_status);
431     PetscFree(send_status);
432   }
433   PetscFree(send_waits); PetscFree(svalues);
434 
435   return 0;
436 }
437 
438 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
439 {
440   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
441   int        ierr;
442 
443   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
444   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
445   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
446   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
448   return 0;
449 }
450 
451 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
452 {
453   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
454   int        ierr;
455   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
456   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
457   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
458   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
459   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
460   return 0;
461 }
462 
463 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
464 {
465   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
466   int        ierr;
467 
468   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
469   /* do nondiagonal part */
470   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
471   /* send it on its way */
472   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
473                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
474   /* do local part */
475   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
476   /* receive remote parts: note this assumes the values are not actually */
477   /* inserted in yy until the next line, which is true for my implementation*/
478   /* but is not perhaps always true. */
479   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
480                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
481   return 0;
482 }
483 
484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487   int        ierr;
488 
489   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
490   /* do nondiagonal part */
491   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
492   /* send it on its way */
493   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
494                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495   /* do local part */
496   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
497   /* receive remote parts: note this assumes the values are not actually */
498   /* inserted in yy until the next line, which is true for my implementation*/
499   /* but is not perhaps always true. */
500   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
501                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
502   return 0;
503 }
504 
505 /*
506   This only works correctly for square matrices where the subblock A->A is the
507    diagonal block
508 */
509 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
510 {
511   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
513   return MatGetDiagonal(a->A,v);
514 }
515 
516 static int MatDestroy_MPIAIJ(PetscObject obj)
517 {
518   Mat        mat = (Mat) obj;
519   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
520   int        ierr;
521 #if defined(PETSC_LOG)
522   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
523 #endif
524   PetscFree(aij->rowners);
525   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
526   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
527   if (aij->colmap) PetscFree(aij->colmap);
528   if (aij->garray) PetscFree(aij->garray);
529   if (aij->lvec)   VecDestroy(aij->lvec);
530   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
531   PetscFree(aij);
532   PLogObjectDestroy(mat);
533   PetscHeaderDestroy(mat);
534   return 0;
535 }
536 #include "draw.h"
537 #include "pinclude/pviewer.h"
538 
539 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
540 {
541   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
542   int         ierr;
543 
544   if (aij->size == 1) {
545     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
546   }
547   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
548   return 0;
549 }
550 
551 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
552 {
553   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
554   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
555   int         ierr, format,shift = C->indexshift,rank;
556   PetscObject vobj = (PetscObject) viewer;
557   FILE        *fd;
558 
559   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
560     ierr = ViewerFileGetFormat_Private(viewer,&format);
561     if (format == FILE_FORMAT_INFO_DETAILED) {
562       int nz,nzalloc,mem;
563       MPI_Comm_rank(mat->comm,&rank);
564       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
565       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
566       MPIU_Seq_begin(mat->comm,1);
567       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
568                 nzalloc,mem);
569       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
570       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
571       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
572       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
573       fflush(fd);
574       MPIU_Seq_end(mat->comm,1);
575       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
576       return 0;
577     }
578     else if (format == FILE_FORMAT_INFO) {
579       return 0;
580     }
581   }
582 
583   if (vobj->type == ASCII_FILE_VIEWER) {
584     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
585     MPIU_Seq_begin(mat->comm,1);
586     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
587            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
588            aij->cend);
589     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
590     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
591     fflush(fd);
592     MPIU_Seq_end(mat->comm,1);
593   }
594   else {
595     int size = aij->size;
596     rank = aij->rank;
597     if (size == 1) {
598       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
599     }
600     else {
601       /* assemble the entire matrix onto first processor. */
602       Mat         A;
603       Mat_SeqAIJ *Aloc;
604       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
605       Scalar      *a;
606 
607       if (!rank) {
608         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
609                CHKERRQ(ierr);
610       }
611       else {
612         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
613                CHKERRQ(ierr);
614       }
615       PLogObjectParent(mat,A);
616 
617       /* copy over the A part */
618       Aloc = (Mat_SeqAIJ*) aij->A->data;
619       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
620       row = aij->rstart;
621       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
622       for ( i=0; i<m; i++ ) {
623         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
624         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
625       }
626       aj = Aloc->j;
627       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
628 
629       /* copy over the B part */
630       Aloc = (Mat_SeqAIJ*) aij->B->data;
631       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
632       row = aij->rstart;
633       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
634       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
635       for ( i=0; i<m; i++ ) {
636         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
637         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
638       }
639       PetscFree(ct);
640       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
641       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
642       if (!rank) {
643         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
644       }
645       ierr = MatDestroy(A); CHKERRQ(ierr);
646     }
647   }
648   return 0;
649 }
650 
651 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
652 {
653   Mat         mat = (Mat) obj;
654   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
655   int         ierr;
656   PetscObject vobj = (PetscObject) viewer;
657 
658   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
659   if (!viewer) {
660     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
661   }
662   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
663   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
664     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
665   }
666   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
667     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
668   }
669   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
670     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
671   }
672   else if (vobj->cookie == DRAW_COOKIE) {
673     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
674   }
675   else if (vobj->type == BINARY_FILE_VIEWER) {
676     return MatView_MPIAIJ_Binary(mat,viewer);
677   }
678   return 0;
679 }
680 
681 extern int MatMarkDiag_SeqAIJ(Mat);
682 /*
683     This has to provide several versions.
684 
685      1) per sequential
686      2) a) use only local smoothing updating outer values only once.
687         b) local smoothing updating outer values each inner iteration
688      3) color updating out values betwen colors.
689 */
690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
691                            double fshift,int its,Vec xx)
692 {
693   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
694   Mat        AA = mat->A, BB = mat->B;
695   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
696   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
697   int        ierr,*idx, *diag;
698   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
699   Vec        tt;
700 
701   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
702 
703   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
704   xs = x + shift; /* shift by one for index start of 1 */
705   ls = ls + shift;
706   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
707   diag = A->diag;
708   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
709     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
710   }
711   if (flag & SOR_EISENSTAT) {
712     /* Let  A = L + U + D; where L is lower trianglar,
713     U is upper triangular, E is diagonal; This routine applies
714 
715             (L + E)^{-1} A (U + E)^{-1}
716 
717     to a vector efficiently using Eisenstat's trick. This is for
718     the case of SSOR preconditioner, so E is D/omega where omega
719     is the relaxation factor.
720     */
721     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
722     PLogObjectParent(matin,tt);
723     VecGetArray(tt,&t);
724     scale = (2.0/omega) - 1.0;
725     /*  x = (E + U)^{-1} b */
726     VecSet(&zero,mat->lvec);
727     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
728                               mat->Mvctx); CHKERRQ(ierr);
729     for ( i=m-1; i>-1; i-- ) {
730       n    = A->i[i+1] - diag[i] - 1;
731       idx  = A->j + diag[i] + !shift;
732       v    = A->a + diag[i] + !shift;
733       sum  = b[i];
734       SPARSEDENSEMDOT(sum,xs,v,idx,n);
735       d    = fshift + A->a[diag[i]+shift];
736       n    = B->i[i+1] - B->i[i];
737       idx  = B->j + B->i[i] + shift;
738       v    = B->a + B->i[i] + shift;
739       SPARSEDENSEMDOT(sum,ls,v,idx,n);
740       x[i] = omega*(sum/d);
741     }
742     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
743                             mat->Mvctx); CHKERRQ(ierr);
744 
745     /*  t = b - (2*E - D)x */
746     v = A->a;
747     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
748 
749     /*  t = (E + L)^{-1}t */
750     ts = t + shift; /* shifted by one for index start of a or mat->j*/
751     diag = A->diag;
752     VecSet(&zero,mat->lvec);
753     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
754                                                  mat->Mvctx); CHKERRQ(ierr);
755     for ( i=0; i<m; i++ ) {
756       n    = diag[i] - A->i[i];
757       idx  = A->j + A->i[i] + shift;
758       v    = A->a + A->i[i] + shift;
759       sum  = t[i];
760       SPARSEDENSEMDOT(sum,ts,v,idx,n);
761       d    = fshift + A->a[diag[i]+shift];
762       n    = B->i[i+1] - B->i[i];
763       idx  = B->j + B->i[i] + shift;
764       v    = B->a + B->i[i] + shift;
765       SPARSEDENSEMDOT(sum,ls,v,idx,n);
766       t[i] = omega*(sum/d);
767     }
768     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
769                                                     mat->Mvctx); CHKERRQ(ierr);
770     /*  x = x + t */
771     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
772     VecDestroy(tt);
773     return 0;
774   }
775 
776 
777   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
778     if (flag & SOR_ZERO_INITIAL_GUESS) {
779       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
780     }
781     else {
782       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
783       CHKERRQ(ierr);
784       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
785       CHKERRQ(ierr);
786     }
787     while (its--) {
788       /* go down through the rows */
789       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
790                               mat->Mvctx); CHKERRQ(ierr);
791       for ( i=0; i<m; i++ ) {
792         n    = A->i[i+1] - A->i[i];
793         idx  = A->j + A->i[i] + shift;
794         v    = A->a + A->i[i] + shift;
795         sum  = b[i];
796         SPARSEDENSEMDOT(sum,xs,v,idx,n);
797         d    = fshift + A->a[diag[i]+shift];
798         n    = B->i[i+1] - B->i[i];
799         idx  = B->j + B->i[i] + shift;
800         v    = B->a + B->i[i] + shift;
801         SPARSEDENSEMDOT(sum,ls,v,idx,n);
802         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
803       }
804       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
805                             mat->Mvctx); CHKERRQ(ierr);
806       /* come up through the rows */
807       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
808                               mat->Mvctx); CHKERRQ(ierr);
809       for ( i=m-1; i>-1; i-- ) {
810         n    = A->i[i+1] - A->i[i];
811         idx  = A->j + A->i[i] + shift;
812         v    = A->a + A->i[i] + shift;
813         sum  = b[i];
814         SPARSEDENSEMDOT(sum,xs,v,idx,n);
815         d    = fshift + A->a[diag[i]+shift];
816         n    = B->i[i+1] - B->i[i];
817         idx  = B->j + B->i[i] + shift;
818         v    = B->a + B->i[i] + shift;
819         SPARSEDENSEMDOT(sum,ls,v,idx,n);
820         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
821       }
822       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
823                             mat->Mvctx); CHKERRQ(ierr);
824     }
825   }
826   else if (flag & SOR_FORWARD_SWEEP){
827     if (flag & SOR_ZERO_INITIAL_GUESS) {
828       VecSet(&zero,mat->lvec);
829       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
830                               mat->Mvctx); CHKERRQ(ierr);
831       for ( i=0; i<m; i++ ) {
832         n    = diag[i] - A->i[i];
833         idx  = A->j + A->i[i] + shift;
834         v    = A->a + A->i[i] + shift;
835         sum  = b[i];
836         SPARSEDENSEMDOT(sum,xs,v,idx,n);
837         d    = fshift + A->a[diag[i]+shift];
838         n    = B->i[i+1] - B->i[i];
839         idx  = B->j + B->i[i] + shift;
840         v    = B->a + B->i[i] + shift;
841         SPARSEDENSEMDOT(sum,ls,v,idx,n);
842         x[i] = omega*(sum/d);
843       }
844       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
845                             mat->Mvctx); CHKERRQ(ierr);
846       its--;
847     }
848     while (its--) {
849       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
850       CHKERRQ(ierr);
851       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
852       CHKERRQ(ierr);
853       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
854                               mat->Mvctx); CHKERRQ(ierr);
855       for ( i=0; i<m; i++ ) {
856         n    = A->i[i+1] - A->i[i];
857         idx  = A->j + A->i[i] + shift;
858         v    = A->a + A->i[i] + shift;
859         sum  = b[i];
860         SPARSEDENSEMDOT(sum,xs,v,idx,n);
861         d    = fshift + A->a[diag[i]+shift];
862         n    = B->i[i+1] - B->i[i];
863         idx  = B->j + B->i[i] + shift;
864         v    = B->a + B->i[i] + shift;
865         SPARSEDENSEMDOT(sum,ls,v,idx,n);
866         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
867       }
868       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
869                             mat->Mvctx); CHKERRQ(ierr);
870     }
871   }
872   else if (flag & SOR_BACKWARD_SWEEP){
873     if (flag & SOR_ZERO_INITIAL_GUESS) {
874       VecSet(&zero,mat->lvec);
875       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
876                               mat->Mvctx); CHKERRQ(ierr);
877       for ( i=m-1; i>-1; i-- ) {
878         n    = A->i[i+1] - diag[i] - 1;
879         idx  = A->j + diag[i] + !shift;
880         v    = A->a + diag[i] + !shift;
881         sum  = b[i];
882         SPARSEDENSEMDOT(sum,xs,v,idx,n);
883         d    = fshift + A->a[diag[i]+shift];
884         n    = B->i[i+1] - B->i[i];
885         idx  = B->j + B->i[i] + shift;
886         v    = B->a + B->i[i] + shift;
887         SPARSEDENSEMDOT(sum,ls,v,idx,n);
888         x[i] = omega*(sum/d);
889       }
890       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
891                             mat->Mvctx); CHKERRQ(ierr);
892       its--;
893     }
894     while (its--) {
895       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
896                             mat->Mvctx); CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
898                             mat->Mvctx); CHKERRQ(ierr);
899       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
900                               mat->Mvctx); CHKERRQ(ierr);
901       for ( i=m-1; i>-1; i-- ) {
902         n    = A->i[i+1] - A->i[i];
903         idx  = A->j + A->i[i] + shift;
904         v    = A->a + A->i[i] + shift;
905         sum  = b[i];
906         SPARSEDENSEMDOT(sum,xs,v,idx,n);
907         d    = fshift + A->a[diag[i]+shift];
908         n    = B->i[i+1] - B->i[i];
909         idx  = B->j + B->i[i] + shift;
910         v    = B->a + B->i[i] + shift;
911         SPARSEDENSEMDOT(sum,ls,v,idx,n);
912         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
913       }
914       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
915                             mat->Mvctx); CHKERRQ(ierr);
916     }
917   }
918   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
919     if (flag & SOR_ZERO_INITIAL_GUESS) {
920       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
921     }
922     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
923     CHKERRQ(ierr);
924     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
925     CHKERRQ(ierr);
926     while (its--) {
927       /* go down through the rows */
928       for ( i=0; i<m; i++ ) {
929         n    = A->i[i+1] - A->i[i];
930         idx  = A->j + A->i[i] + shift;
931         v    = A->a + A->i[i] + shift;
932         sum  = b[i];
933         SPARSEDENSEMDOT(sum,xs,v,idx,n);
934         d    = fshift + A->a[diag[i]+shift];
935         n    = B->i[i+1] - B->i[i];
936         idx  = B->j + B->i[i] + shift;
937         v    = B->a + B->i[i] + shift;
938         SPARSEDENSEMDOT(sum,ls,v,idx,n);
939         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
940       }
941       /* come up through the rows */
942       for ( i=m-1; i>-1; i-- ) {
943         n    = A->i[i+1] - A->i[i];
944         idx  = A->j + A->i[i] + shift;
945         v    = A->a + A->i[i] + shift;
946         sum  = b[i];
947         SPARSEDENSEMDOT(sum,xs,v,idx,n);
948         d    = fshift + A->a[diag[i]+shift];
949         n    = B->i[i+1] - B->i[i];
950         idx  = B->j + B->i[i] + shift;
951         v    = B->a + B->i[i] + shift;
952         SPARSEDENSEMDOT(sum,ls,v,idx,n);
953         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
954       }
955     }
956   }
957   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
958     if (flag & SOR_ZERO_INITIAL_GUESS) {
959       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
960     }
961     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
962     CHKERRQ(ierr);
963     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
964     CHKERRQ(ierr);
965     while (its--) {
966       for ( i=0; i<m; i++ ) {
967         n    = A->i[i+1] - A->i[i];
968         idx  = A->j + A->i[i] + shift;
969         v    = A->a + A->i[i] + shift;
970         sum  = b[i];
971         SPARSEDENSEMDOT(sum,xs,v,idx,n);
972         d    = fshift + A->a[diag[i]+shift];
973         n    = B->i[i+1] - B->i[i];
974         idx  = B->j + B->i[i] + shift;
975         v    = B->a + B->i[i] + shift;
976         SPARSEDENSEMDOT(sum,ls,v,idx,n);
977         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
978       }
979     }
980   }
981   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
982     if (flag & SOR_ZERO_INITIAL_GUESS) {
983       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
984     }
985     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
986                             mat->Mvctx); CHKERRQ(ierr);
987     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
988                             mat->Mvctx); CHKERRQ(ierr);
989     while (its--) {
990       for ( i=m-1; i>-1; i-- ) {
991         n    = A->i[i+1] - A->i[i];
992         idx  = A->j + A->i[i] + shift;
993         v    = A->a + A->i[i] + shift;
994         sum  = b[i];
995         SPARSEDENSEMDOT(sum,xs,v,idx,n);
996         d    = fshift + A->a[diag[i]+shift];
997         n    = B->i[i+1] - B->i[i];
998         idx  = B->j + B->i[i] + shift;
999         v    = B->a + B->i[i] + shift;
1000         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1001         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1002       }
1003     }
1004   }
1005   return 0;
1006 }
1007 
1008 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1009                              int *nzalloc,int *mem)
1010 {
1011   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1012   Mat        A = mat->A, B = mat->B;
1013   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1014 
1015   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1016   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1017   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1018   if (flag == MAT_LOCAL) {
1019     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1020   } else if (flag == MAT_GLOBAL_MAX) {
1021     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1022     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1023   } else if (flag == MAT_GLOBAL_SUM) {
1024     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1025     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1026   }
1027   return 0;
1028 }
1029 
1030 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1031 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1032 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1033 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1034 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1035 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1036 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1037 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1038 
1039 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1040 {
1041   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1042 
1043   if (op == NO_NEW_NONZERO_LOCATIONS ||
1044       op == YES_NEW_NONZERO_LOCATIONS ||
1045       op == COLUMNS_SORTED ||
1046       op == ROW_ORIENTED) {
1047         MatSetOption(a->A,op);
1048         MatSetOption(a->B,op);
1049   }
1050   else if (op == ROWS_SORTED ||
1051            op == SYMMETRIC_MATRIX ||
1052            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1053            op == YES_NEW_DIAGONALS)
1054     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1055   else if (op == COLUMN_ORIENTED) {
1056     a->roworiented = 0;
1057     MatSetOption(a->A,op);
1058     MatSetOption(a->B,op);
1059   }
1060   else if (op == NO_NEW_DIAGONALS)
1061     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1062   else
1063     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1064   return 0;
1065 }
1066 
1067 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1068 {
1069   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1070   *m = mat->M; *n = mat->N;
1071   return 0;
1072 }
1073 
1074 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1075 {
1076   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1077   *m = mat->m; *n = mat->N;
1078   return 0;
1079 }
1080 
1081 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1084   *m = mat->rstart; *n = mat->rend;
1085   return 0;
1086 }
1087 
1088 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1089 {
1090   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1091   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1092   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1093   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1094 
1095   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1096   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1097   lrow = row - rstart;
1098 
1099   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1100   if (!v)   {pvA = 0; pvB = 0;}
1101   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1102   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1103   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1104   nztot = nzA + nzB;
1105 
1106   if (v  || idx) {
1107     if (nztot) {
1108       /* Sort by increasing column numbers, assuming A and B already sorted */
1109       int imark;
1110       if (mat->assembled) {
1111         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1112       }
1113       if (v) {
1114         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1115         for ( i=0; i<nzB; i++ ) {
1116           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1117           else break;
1118         }
1119         imark = i;
1120         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1121         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1122       }
1123       if (idx) {
1124         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1125         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1126         for ( i=0; i<nzB; i++ ) {
1127           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1128           else break;
1129         }
1130         imark = i;
1131         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1132         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1133       }
1134     }
1135     else {*idx = 0; *v=0;}
1136   }
1137   *nz = nztot;
1138   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1139   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1140   return 0;
1141 }
1142 
1143 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1144 {
1145   if (idx) PetscFree(*idx);
1146   if (v) PetscFree(*v);
1147   return 0;
1148 }
1149 
1150 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1151 {
1152   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1153   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1154   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1155   double     sum = 0.0;
1156   Scalar     *v;
1157 
1158   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1159   if (aij->size == 1) {
1160     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1161   } else {
1162     if (type == NORM_FROBENIUS) {
1163       v = amat->a;
1164       for (i=0; i<amat->nz; i++ ) {
1165 #if defined(PETSC_COMPLEX)
1166         sum += real(conj(*v)*(*v)); v++;
1167 #else
1168         sum += (*v)*(*v); v++;
1169 #endif
1170       }
1171       v = bmat->a;
1172       for (i=0; i<bmat->nz; i++ ) {
1173 #if defined(PETSC_COMPLEX)
1174         sum += real(conj(*v)*(*v)); v++;
1175 #else
1176         sum += (*v)*(*v); v++;
1177 #endif
1178       }
1179       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1180       *norm = sqrt(*norm);
1181     }
1182     else if (type == NORM_1) { /* max column norm */
1183       double *tmp, *tmp2;
1184       int    *jj, *garray = aij->garray;
1185       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1186       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1187       PetscMemzero(tmp,aij->N*sizeof(double));
1188       *norm = 0.0;
1189       v = amat->a; jj = amat->j;
1190       for ( j=0; j<amat->nz; j++ ) {
1191         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1192       }
1193       v = bmat->a; jj = bmat->j;
1194       for ( j=0; j<bmat->nz; j++ ) {
1195         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1196       }
1197       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1198       for ( j=0; j<aij->N; j++ ) {
1199         if (tmp2[j] > *norm) *norm = tmp2[j];
1200       }
1201       PetscFree(tmp); PetscFree(tmp2);
1202     }
1203     else if (type == NORM_INFINITY) { /* max row norm */
1204       double ntemp = 0.0;
1205       for ( j=0; j<amat->m; j++ ) {
1206         v = amat->a + amat->i[j] + shift;
1207         sum = 0.0;
1208         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1209           sum += PetscAbsScalar(*v); v++;
1210         }
1211         v = bmat->a + bmat->i[j] + shift;
1212         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1213           sum += PetscAbsScalar(*v); v++;
1214         }
1215         if (sum > ntemp) ntemp = sum;
1216       }
1217       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1218     }
1219     else {
1220       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1221     }
1222   }
1223   return 0;
1224 }
1225 
1226 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1227 {
1228   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1229   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1230   int        ierr,shift = Aloc->indexshift;
1231   Mat        B;
1232   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1233   Scalar     *array;
1234 
1235   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1236   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1237          PETSC_NULL,&B); CHKERRQ(ierr);
1238 
1239   /* copy over the A part */
1240   Aloc = (Mat_SeqAIJ*) a->A->data;
1241   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1242   row = a->rstart;
1243   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1244   for ( i=0; i<m; i++ ) {
1245     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1246     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1247   }
1248   aj = Aloc->j;
1249   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1250 
1251   /* copy over the B part */
1252   Aloc = (Mat_SeqAIJ*) a->B->data;
1253   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1254   row = a->rstart;
1255   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1256   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1257   for ( i=0; i<m; i++ ) {
1258     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1259     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1260   }
1261   PetscFree(ct);
1262   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1263   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1264   if (matout) {
1265     *matout = B;
1266   } else {
1267     /* This isn't really an in-place transpose .... but free data structures from a */
1268     PetscFree(a->rowners);
1269     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1270     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1271     if (a->colmap) PetscFree(a->colmap);
1272     if (a->garray) PetscFree(a->garray);
1273     if (a->lvec) VecDestroy(a->lvec);
1274     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1275     PetscFree(a);
1276     PetscMemcpy(A,B,sizeof(struct _Mat));
1277     PetscHeaderDestroy(B);
1278   }
1279   return 0;
1280 }
1281 
1282 extern int MatPrintHelp_SeqAIJ(Mat);
1283 static int MatPrintHelp_MPIAIJ(Mat A)
1284 {
1285   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1286 
1287   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1288   else return 0;
1289 }
1290 
1291 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1292 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1293 
1294 /* -------------------------------------------------------------------*/
1295 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1296        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1297        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1298        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1299        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1300        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1301        MatLUFactor_MPIAIJ,0,
1302        MatRelax_MPIAIJ,
1303        MatTranspose_MPIAIJ,
1304        MatGetInfo_MPIAIJ,0,
1305        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1306        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1307        0,
1308        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1309        MatGetReordering_MPIAIJ,
1310        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1311        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1312        MatILUFactorSymbolic_MPIAIJ,0,
1313        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1314        0,0,0,
1315        0,0,MatGetValues_MPIAIJ,0,
1316        MatPrintHelp_MPIAIJ};
1317 
1318 /*@C
1319    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1320    (the default parallel PETSc format).
1321 
1322    Input Parameters:
1323 .  comm - MPI communicator
1324 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1325 .  n - number of local columns (or PETSC_DECIDE to have calculated
1326            if N is given)
1327 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1328 .  N - number of global columns (or PETSC_DECIDE to have calculated
1329            if n is given)
1330 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1331            (same for all local rows)
1332 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1333            or null (possibly different for each row).  You must leave room
1334            for the diagonal entry even if it is zero.
1335 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1336            submatrix (same for all local rows).
1337 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1338            submatrix or null (possibly different for each row).
1339 
1340    Output Parameter:
1341 .  newmat - the matrix
1342 
1343    Notes:
1344    The AIJ format (also called the Yale sparse matrix format or
1345    compressed row storage), is fully compatible with standard Fortran 77
1346    storage.  That is, the stored row and column indices can begin at
1347    either one (as in Fortran) or zero.  See the users manual for details.
1348 
1349    The user MUST specify either the local or global matrix dimensions
1350    (possibly both).
1351 
1352    Storage Information:
1353    For a square global matrix we define each processor's diagonal portion
1354    to be its local rows and the corresponding columns (a square submatrix);
1355    each processor's off-diagonal portion encompasses the remainder of the
1356    local matrix (a rectangular submatrix).
1357 
1358    The user can specify preallocated storage for the diagonal part of
1359    the local submatrix with either d_nz or d_nnz (not both).  Set
1360    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1361    memory allocation.  Likewise, specify preallocated storage for the
1362    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1363 
1364    By default, this format uses inodes (identical nodes) when possible.
1365    We search for consecutive rows with the same nonzero structure, thereby
1366    reusing matrix information to achieve increased efficiency.
1367 
1368    Options Database Keys:
1369 $    -mat_aij_no_inode  - Do not use inodes
1370 $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1371 
1372 .keywords: matrix, aij, compressed row, sparse, parallel
1373 
1374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1375 @*/
1376 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1377                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
1378 {
1379   Mat          mat;
1380   Mat_MPIAIJ   *a;
1381   int          ierr, i,sum[2],work[2];
1382 
1383   *newmat         = 0;
1384   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1385   PLogObjectCreate(mat);
1386   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1387   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1388   mat->destroy    = MatDestroy_MPIAIJ;
1389   mat->view       = MatView_MPIAIJ;
1390   mat->factor     = 0;
1391 
1392   a->insertmode = NOT_SET_VALUES;
1393   MPI_Comm_rank(comm,&a->rank);
1394   MPI_Comm_size(comm,&a->size);
1395 
1396   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1397     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1398 
1399   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1400     work[0] = m; work[1] = n;
1401     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1402     if (M == PETSC_DECIDE) M = sum[0];
1403     if (N == PETSC_DECIDE) N = sum[1];
1404   }
1405   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1406   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1407   a->m = m;
1408   a->n = n;
1409   a->N = N;
1410   a->M = M;
1411 
1412   /* build local table of row and column ownerships */
1413   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1414   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1415   a->cowners = a->rowners + a->size + 1;
1416   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1417   a->rowners[0] = 0;
1418   for ( i=2; i<=a->size; i++ ) {
1419     a->rowners[i] += a->rowners[i-1];
1420   }
1421   a->rstart = a->rowners[a->rank];
1422   a->rend   = a->rowners[a->rank+1];
1423   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1424   a->cowners[0] = 0;
1425   for ( i=2; i<=a->size; i++ ) {
1426     a->cowners[i] += a->cowners[i-1];
1427   }
1428   a->cstart = a->cowners[a->rank];
1429   a->cend   = a->cowners[a->rank+1];
1430 
1431   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1432   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1433   PLogObjectParent(mat,a->A);
1434   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1435   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1436   PLogObjectParent(mat,a->B);
1437 
1438   /* build cache for off array entries formed */
1439   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1440   a->colmap      = 0;
1441   a->garray      = 0;
1442   a->roworiented = 1;
1443 
1444   /* stuff used for matrix vector multiply */
1445   a->lvec      = 0;
1446   a->Mvctx     = 0;
1447   a->assembled = 0;
1448 
1449   *newmat = mat;
1450   return 0;
1451 }
1452 
1453 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1454 {
1455   Mat        mat;
1456   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1457   int        ierr, len,flg;
1458 
1459   if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix");
1460   *newmat       = 0;
1461   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1462   PLogObjectCreate(mat);
1463   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1464   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1465   mat->destroy  = MatDestroy_MPIAIJ;
1466   mat->view     = MatView_MPIAIJ;
1467   mat->factor   = matin->factor;
1468 
1469   a->m          = oldmat->m;
1470   a->n          = oldmat->n;
1471   a->M          = oldmat->M;
1472   a->N          = oldmat->N;
1473 
1474   a->assembled  = 1;
1475   a->rstart     = oldmat->rstart;
1476   a->rend       = oldmat->rend;
1477   a->cstart     = oldmat->cstart;
1478   a->cend       = oldmat->cend;
1479   a->size       = oldmat->size;
1480   a->rank       = oldmat->rank;
1481   a->insertmode = NOT_SET_VALUES;
1482 
1483   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1484   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1485   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1486   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1487   if (oldmat->colmap) {
1488     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1489     PLogObjectMemory(mat,(a->N)*sizeof(int));
1490     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1491   } else a->colmap = 0;
1492   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1493     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1494     PLogObjectMemory(mat,len*sizeof(int));
1495     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1496   } else a->garray = 0;
1497 
1498   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1499   PLogObjectParent(mat,a->lvec);
1500   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1501   PLogObjectParent(mat,a->Mvctx);
1502   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1503   PLogObjectParent(mat,a->A);
1504   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1505   PLogObjectParent(mat,a->B);
1506   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1507   if (flg) {
1508     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1509   }
1510   *newmat = mat;
1511   return 0;
1512 }
1513 
1514 #include "sysio.h"
1515 
1516 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1517 {
1518   Mat          A;
1519   int          i, nz, ierr, j,rstart, rend, fd;
1520   Scalar       *vals,*svals;
1521   PetscObject  vobj = (PetscObject) bview;
1522   MPI_Comm     comm = vobj->comm;
1523   MPI_Status   status;
1524   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1525   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1526   int          tag = ((PetscObject)bview)->tag;
1527 
1528   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1529   if (!rank) {
1530     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1531     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1532     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1533   }
1534 
1535   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1536   M = header[1]; N = header[2];
1537   /* determine ownership of all rows */
1538   m = M/size + ((M % size) > rank);
1539   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1540   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1541   rowners[0] = 0;
1542   for ( i=2; i<=size; i++ ) {
1543     rowners[i] += rowners[i-1];
1544   }
1545   rstart = rowners[rank];
1546   rend   = rowners[rank+1];
1547 
1548   /* distribute row lengths to all processors */
1549   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1550   offlens = ourlens + (rend-rstart);
1551   if (!rank) {
1552     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1553     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1554     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1555     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1556     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1557     PetscFree(sndcounts);
1558   }
1559   else {
1560     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1561   }
1562 
1563   if (!rank) {
1564     /* calculate the number of nonzeros on each processor */
1565     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1566     PetscMemzero(procsnz,size*sizeof(int));
1567     for ( i=0; i<size; i++ ) {
1568       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1569         procsnz[i] += rowlengths[j];
1570       }
1571     }
1572     PetscFree(rowlengths);
1573 
1574     /* determine max buffer needed and allocate it */
1575     maxnz = 0;
1576     for ( i=0; i<size; i++ ) {
1577       maxnz = PetscMax(maxnz,procsnz[i]);
1578     }
1579     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1580 
1581     /* read in my part of the matrix column indices  */
1582     nz = procsnz[0];
1583     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1584     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1585 
1586     /* read in every one elses and ship off */
1587     for ( i=1; i<size; i++ ) {
1588       nz = procsnz[i];
1589       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1590       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1591     }
1592     PetscFree(cols);
1593   }
1594   else {
1595     /* determine buffer space needed for message */
1596     nz = 0;
1597     for ( i=0; i<m; i++ ) {
1598       nz += ourlens[i];
1599     }
1600     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1601 
1602     /* receive message of column indices*/
1603     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1604     MPI_Get_count(&status,MPI_INT,&maxnz);
1605     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1606   }
1607 
1608   /* loop over local rows, determining number of off diagonal entries */
1609   PetscMemzero(offlens,m*sizeof(int));
1610   jj = 0;
1611   for ( i=0; i<m; i++ ) {
1612     for ( j=0; j<ourlens[i]; j++ ) {
1613       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1614       jj++;
1615     }
1616   }
1617 
1618   /* create our matrix */
1619   for ( i=0; i<m; i++ ) {
1620     ourlens[i] -= offlens[i];
1621   }
1622   if (type == MATMPIAIJ) {
1623     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1624   }
1625   else if (type == MATMPIROW) {
1626     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1627   }
1628   A = *newmat;
1629   MatSetOption(A,COLUMNS_SORTED);
1630   for ( i=0; i<m; i++ ) {
1631     ourlens[i] += offlens[i];
1632   }
1633 
1634   if (!rank) {
1635     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1636 
1637     /* read in my part of the matrix numerical values  */
1638     nz = procsnz[0];
1639     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1640 
1641     /* insert into matrix */
1642     jj      = rstart;
1643     smycols = mycols;
1644     svals   = vals;
1645     for ( i=0; i<m; i++ ) {
1646       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1647       smycols += ourlens[i];
1648       svals   += ourlens[i];
1649       jj++;
1650     }
1651 
1652     /* read in other processors and ship out */
1653     for ( i=1; i<size; i++ ) {
1654       nz = procsnz[i];
1655       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1656       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1657     }
1658     PetscFree(procsnz);
1659   }
1660   else {
1661     /* receive numeric values */
1662     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1663 
1664     /* receive message of values*/
1665     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1666     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1667     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1668 
1669     /* insert into matrix */
1670     jj      = rstart;
1671     smycols = mycols;
1672     svals   = vals;
1673     for ( i=0; i<m; i++ ) {
1674       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1675       smycols += ourlens[i];
1676       svals   += ourlens[i];
1677       jj++;
1678     }
1679   }
1680   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1681 
1682   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1683   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1684   return 0;
1685 }
1686