xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0ff6cd277b34a3a5e5f27c7d78f04ebe320870a6)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.116 1996/01/24 16:01:05 balay Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int        ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift,roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (mat->was_assembled) {
67             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
68             col = aij->colmap[in[j]] + shift;
69             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
70               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
71               col =  in[j];
72             }
73           }
74           else col = in[j];
75           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       if (roworiented) {
82         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
83       }
84       else {
85         row = im[i];
86         for ( j=0; j<n; j++ ) {
87           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   return 0;
93 }
94 
95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96 {
97   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100   int        cstart = aij->cstart, cend = aij->cend,row,col;
101   int        shift = C->indexshift;
102 
103   for ( i=0; i<m; i++ ) {
104     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106     if (idxm[i] >= rstart && idxm[i] < rend) {
107       row = idxm[i] - rstart;
108       for ( j=0; j<n; j++ ) {
109         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111         if (idxn[j] >= cstart && idxn[j] < cend){
112           col = idxn[j] - cstart;
113           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114         }
115         else {
116           col = aij->colmap[idxn[j]] + shift;
117           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118         }
119       }
120     }
121     else {
122       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123     }
124   }
125   return 0;
126 }
127 
128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
129 {
130   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131   MPI_Comm    comm = mat->comm;
132   int         size = aij->size, *owners = aij->rowners;
133   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
134   MPI_Request *send_waits,*recv_waits;
135   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
136   InsertMode  addv;
137   Scalar      *rvalues,*svalues;
138 
139   /* make sure all processors are either in INSERTMODE or ADDMODE */
140   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
143   }
144   aij->insertmode = addv; /* in case this processor had no cache */
145 
146   /*  first count number of contributors to each processor */
147   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
149   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
150   for ( i=0; i<aij->stash.n; i++ ) {
151     idx = aij->stash.idx[i];
152     for ( j=0; j<size; j++ ) {
153       if (idx >= owners[j] && idx < owners[j+1]) {
154         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
155       }
156     }
157   }
158   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
159 
160   /* inform other processors of number of messages and max length*/
161   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
162   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
163   nreceives = work[rank];
164   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
165   nmax = work[rank];
166   PetscFree(work);
167 
168   /* post receives:
169        1) each message will consist of ordered pairs
170      (global index,value) we store the global index as a double
171      to simplify the message passing.
172        2) since we don't know how long each individual message is we
173      allocate the largest needed buffer for each receive. Potentially
174      this is a lot of wasted space.
175 
176 
177        This could be done better.
178   */
179   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
180   CHKPTRQ(rvalues);
181   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
182   CHKPTRQ(recv_waits);
183   for ( i=0; i<nreceives; i++ ) {
184     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
185               comm,recv_waits+i);
186   }
187 
188   /* do sends:
189       1) starts[i] gives the starting index in svalues for stuff going to
190          the ith processor
191   */
192   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
193   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
194   CHKPTRQ(send_waits);
195   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
196   starts[0] = 0;
197   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
198   for ( i=0; i<aij->stash.n; i++ ) {
199     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
200     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
201     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
202   }
203   PetscFree(owner);
204   starts[0] = 0;
205   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
206   count = 0;
207   for ( i=0; i<size; i++ ) {
208     if (procs[i]) {
209       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
210                 comm,send_waits+count++);
211     }
212   }
213   PetscFree(starts); PetscFree(nprocs);
214 
215   /* Free cache space */
216   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
217 
218   aij->svalues    = svalues;    aij->rvalues    = rvalues;
219   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
220   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
221   aij->rmax       = nmax;
222 
223   return 0;
224 }
225 extern int MatSetUpMultiply_MPIAIJ(Mat);
226 
227 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
228 {
229   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
230   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
231   MPI_Status  *send_status,recv_status;
232   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
233   int         row,col,other_disassembled,shift = C->indexshift;
234   Scalar      *values,val;
235   InsertMode  addv = aij->insertmode;
236 
237   /*  wait on receives */
238   while (count) {
239     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
240     /* unpack receives into our local space */
241     values = aij->rvalues + 3*imdex*aij->rmax;
242     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
243     n = n/3;
244     for ( i=0; i<n; i++ ) {
245       row = (int) PetscReal(values[3*i]) - aij->rstart;
246       col = (int) PetscReal(values[3*i+1]);
247       val = values[3*i+2];
248       if (col >= aij->cstart && col < aij->cend) {
249         col -= aij->cstart;
250         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
251       }
252       else {
253         if (mat->was_assembled) {
254           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
255           col = aij->colmap[col] + shift;
256           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
257             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
258             col = (int) PetscReal(values[3*i+1]);
259           }
260         }
261         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
262       }
263     }
264     count--;
265   }
266   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
267 
268   /* wait on sends */
269   if (aij->nsends) {
270     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
271     CHKPTRQ(send_status);
272     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
273     PetscFree(send_status);
274   }
275   PetscFree(aij->send_waits); PetscFree(aij->svalues);
276 
277   aij->insertmode = NOT_SET_VALUES;
278   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
279   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
280 
281   /* determine if any processor has disassembled, if so we must
282      also disassemble ourselfs, in order that we may reassemble. */
283   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
284   if (mat->was_assembled && !other_disassembled) {
285     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
286   }
287 
288   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
289     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
290   }
291   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
292   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
293 
294   return 0;
295 }
296 
297 static int MatZeroEntries_MPIAIJ(Mat A)
298 {
299   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
300   int        ierr;
301   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
302   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
303   return 0;
304 }
305 
306 /* the code does not do the diagonal entries correctly unless the
307    matrix is square and the column and row owerships are identical.
308    This is a BUG. The only way to fix it seems to be to access
309    aij->A and aij->B directly and not through the MatZeroRows()
310    routine.
311 */
312 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
313 {
314   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
315   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
316   int            *procs,*nprocs,j,found,idx,nsends,*work;
317   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
318   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
319   int            *lens,imdex,*lrows,*values;
320   MPI_Comm       comm = A->comm;
321   MPI_Request    *send_waits,*recv_waits;
322   MPI_Status     recv_status,*send_status;
323   IS             istmp;
324 
325   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
326   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
327 
328   /*  first count number of contributors to each processor */
329   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
330   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
331   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
332   for ( i=0; i<N; i++ ) {
333     idx = rows[i];
334     found = 0;
335     for ( j=0; j<size; j++ ) {
336       if (idx >= owners[j] && idx < owners[j+1]) {
337         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
338       }
339     }
340     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
341   }
342   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
343 
344   /* inform other processors of number of messages and max length*/
345   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
346   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
347   nrecvs = work[rank];
348   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
349   nmax = work[rank];
350   PetscFree(work);
351 
352   /* post receives:   */
353   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
354   CHKPTRQ(rvalues);
355   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
356   CHKPTRQ(recv_waits);
357   for ( i=0; i<nrecvs; i++ ) {
358     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
359   }
360 
361   /* do sends:
362       1) starts[i] gives the starting index in svalues for stuff going to
363          the ith processor
364   */
365   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
366   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
367   CHKPTRQ(send_waits);
368   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
369   starts[0] = 0;
370   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
371   for ( i=0; i<N; i++ ) {
372     svalues[starts[owner[i]]++] = rows[i];
373   }
374   ISRestoreIndices(is,&rows);
375 
376   starts[0] = 0;
377   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
382     }
383   }
384   PetscFree(starts);
385 
386   base = owners[rank];
387 
388   /*  wait on receives */
389   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
390   source = lens + nrecvs;
391   count  = nrecvs; slen = 0;
392   while (count) {
393     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
394     /* unpack receives into our local space */
395     MPI_Get_count(&recv_status,MPI_INT,&n);
396     source[imdex]  = recv_status.MPI_SOURCE;
397     lens[imdex]  = n;
398     slen += n;
399     count--;
400   }
401   PetscFree(recv_waits);
402 
403   /* move the data into the send scatter */
404   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
405   count = 0;
406   for ( i=0; i<nrecvs; i++ ) {
407     values = rvalues + i*nmax;
408     for ( j=0; j<lens[i]; j++ ) {
409       lrows[count++] = values[j] - base;
410     }
411   }
412   PetscFree(rvalues); PetscFree(lens);
413   PetscFree(owner); PetscFree(nprocs);
414 
415   /* actually zap the local rows */
416   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
417   PLogObjectParent(A,istmp);
418   PetscFree(lrows);
419   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
420   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
421   ierr = ISDestroy(istmp); CHKERRQ(ierr);
422 
423   /* wait on sends */
424   if (nsends) {
425     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
426     CHKPTRQ(send_status);
427     MPI_Waitall(nsends,send_waits,send_status);
428     PetscFree(send_status);
429   }
430   PetscFree(send_waits); PetscFree(svalues);
431 
432   return 0;
433 }
434 
435 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
436 {
437   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
438   int        ierr;
439 
440   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
441   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
442   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
443   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
444   return 0;
445 }
446 
447 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
448 {
449   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
450   int        ierr;
451   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
452   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
454   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
455   return 0;
456 }
457 
458 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
459 {
460   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
461   int        ierr;
462 
463   /* do nondiagonal part */
464   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
465   /* send it on its way */
466   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
467                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
468   /* do local part */
469   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
470   /* receive remote parts: note this assumes the values are not actually */
471   /* inserted in yy until the next line, which is true for my implementation*/
472   /* but is not perhaps always true. */
473   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
474                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
475   return 0;
476 }
477 
478 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
479 {
480   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
481   int        ierr;
482 
483   /* do nondiagonal part */
484   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
485   /* send it on its way */
486   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
487                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
488   /* do local part */
489   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
490   /* receive remote parts: note this assumes the values are not actually */
491   /* inserted in yy until the next line, which is true for my implementation*/
492   /* but is not perhaps always true. */
493   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
494                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495   return 0;
496 }
497 
498 /*
499   This only works correctly for square matrices where the subblock A->A is the
500    diagonal block
501 */
502 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
503 {
504   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
505   return MatGetDiagonal(a->A,v);
506 }
507 
508 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
509 {
510   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
511   int        ierr;
512   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
513   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
514   return 0;
515 }
516 
517 static int MatDestroy_MPIAIJ(PetscObject obj)
518 {
519   Mat        mat = (Mat) obj;
520   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
521   int        ierr;
522 #if defined(PETSC_LOG)
523   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
524 #endif
525   PetscFree(aij->rowners);
526   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
527   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
528   if (aij->colmap) PetscFree(aij->colmap);
529   if (aij->garray) PetscFree(aij->garray);
530   if (aij->lvec)   VecDestroy(aij->lvec);
531   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
532   PetscFree(aij);
533   PLogObjectDestroy(mat);
534   PetscHeaderDestroy(mat);
535   return 0;
536 }
537 #include "draw.h"
538 #include "pinclude/pviewer.h"
539 
540 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
541 {
542   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
543   int         ierr;
544 
545   if (aij->size == 1) {
546     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
547   }
548   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
549   return 0;
550 }
551 
552 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
553 {
554   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
555   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
556   int         ierr, format,shift = C->indexshift,rank;
557   PetscObject vobj = (PetscObject) viewer;
558   FILE        *fd;
559 
560   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
561     ierr = ViewerFileGetFormat_Private(viewer,&format);
562     if (format == FILE_FORMAT_INFO_DETAILED) {
563       int nz,nzalloc,mem;
564       MPI_Comm_rank(mat->comm,&rank);
565       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
566       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
567       MPIU_Seq_begin(mat->comm,1);
568       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
569                 nzalloc,mem);
570       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
571       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
572       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
573       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
574       fflush(fd);
575       MPIU_Seq_end(mat->comm,1);
576       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
577       return 0;
578     }
579     else if (format == FILE_FORMAT_INFO) {
580       return 0;
581     }
582   }
583 
584   if (vobj->type == ASCII_FILE_VIEWER) {
585     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
586     MPIU_Seq_begin(mat->comm,1);
587     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
588            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
589            aij->cend);
590     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
591     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
592     fflush(fd);
593     MPIU_Seq_end(mat->comm,1);
594   }
595   else {
596     int size = aij->size;
597     rank = aij->rank;
598     if (size == 1) {
599       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
600     }
601     else {
602       /* assemble the entire matrix onto first processor. */
603       Mat         A;
604       Mat_SeqAIJ *Aloc;
605       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
606       Scalar      *a;
607 
608       if (!rank) {
609         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
610                CHKERRQ(ierr);
611       }
612       else {
613         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
614                CHKERRQ(ierr);
615       }
616       PLogObjectParent(mat,A);
617 
618       /* copy over the A part */
619       Aloc = (Mat_SeqAIJ*) aij->A->data;
620       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
621       row = aij->rstart;
622       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
623       for ( i=0; i<m; i++ ) {
624         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
625         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
626       }
627       aj = Aloc->j;
628       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
629 
630       /* copy over the B part */
631       Aloc = (Mat_SeqAIJ*) aij->B->data;
632       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
633       row = aij->rstart;
634       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
635       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
636       for ( i=0; i<m; i++ ) {
637         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
638         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
639       }
640       PetscFree(ct);
641       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
642       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
643       if (!rank) {
644         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
645       }
646       ierr = MatDestroy(A); CHKERRQ(ierr);
647     }
648   }
649   return 0;
650 }
651 
652 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
653 {
654   Mat         mat = (Mat) obj;
655   int         ierr;
656   PetscObject vobj = (PetscObject) viewer;
657 
658   if (!viewer) {
659     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
660   }
661   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
662   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
663     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
664   }
665   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
666     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
667   }
668   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
669     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
670   }
671   else if (vobj->cookie == DRAW_COOKIE) {
672     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
673   }
674   else if (vobj->type == BINARY_FILE_VIEWER) {
675     return MatView_MPIAIJ_Binary(mat,viewer);
676   }
677   return 0;
678 }
679 
680 extern int MatMarkDiag_SeqAIJ(Mat);
681 /*
682     This has to provide several versions.
683 
684      1) per sequential
685      2) a) use only local smoothing updating outer values only once.
686         b) local smoothing updating outer values each inner iteration
687      3) color updating out values betwen colors.
688 */
689 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
690                            double fshift,int its,Vec xx)
691 {
692   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
693   Mat        AA = mat->A, BB = mat->B;
694   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
695   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
696   int        ierr,*idx, *diag;
697   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
698   Vec        tt;
699 
700   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
701   xs = x + shift; /* shift by one for index start of 1 */
702   ls = ls + shift;
703   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
704   diag = A->diag;
705   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
706     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
707   }
708   if (flag & SOR_EISENSTAT) {
709     /* Let  A = L + U + D; where L is lower trianglar,
710     U is upper triangular, E is diagonal; This routine applies
711 
712             (L + E)^{-1} A (U + E)^{-1}
713 
714     to a vector efficiently using Eisenstat's trick. This is for
715     the case of SSOR preconditioner, so E is D/omega where omega
716     is the relaxation factor.
717     */
718     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
719     PLogObjectParent(matin,tt);
720     VecGetArray(tt,&t);
721     scale = (2.0/omega) - 1.0;
722     /*  x = (E + U)^{-1} b */
723     VecSet(&zero,mat->lvec);
724     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
725                               mat->Mvctx); CHKERRQ(ierr);
726     for ( i=m-1; i>-1; i-- ) {
727       n    = A->i[i+1] - diag[i] - 1;
728       idx  = A->j + diag[i] + !shift;
729       v    = A->a + diag[i] + !shift;
730       sum  = b[i];
731       SPARSEDENSEMDOT(sum,xs,v,idx,n);
732       d    = fshift + A->a[diag[i]+shift];
733       n    = B->i[i+1] - B->i[i];
734       idx  = B->j + B->i[i] + shift;
735       v    = B->a + B->i[i] + shift;
736       SPARSEDENSEMDOT(sum,ls,v,idx,n);
737       x[i] = omega*(sum/d);
738     }
739     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
740                             mat->Mvctx); CHKERRQ(ierr);
741 
742     /*  t = b - (2*E - D)x */
743     v = A->a;
744     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
745 
746     /*  t = (E + L)^{-1}t */
747     ts = t + shift; /* shifted by one for index start of a or mat->j*/
748     diag = A->diag;
749     VecSet(&zero,mat->lvec);
750     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
751                                                  mat->Mvctx); CHKERRQ(ierr);
752     for ( i=0; i<m; i++ ) {
753       n    = diag[i] - A->i[i];
754       idx  = A->j + A->i[i] + shift;
755       v    = A->a + A->i[i] + shift;
756       sum  = t[i];
757       SPARSEDENSEMDOT(sum,ts,v,idx,n);
758       d    = fshift + A->a[diag[i]+shift];
759       n    = B->i[i+1] - B->i[i];
760       idx  = B->j + B->i[i] + shift;
761       v    = B->a + B->i[i] + shift;
762       SPARSEDENSEMDOT(sum,ls,v,idx,n);
763       t[i] = omega*(sum/d);
764     }
765     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
766                                                     mat->Mvctx); CHKERRQ(ierr);
767     /*  x = x + t */
768     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
769     VecDestroy(tt);
770     return 0;
771   }
772 
773 
774   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
775     if (flag & SOR_ZERO_INITIAL_GUESS) {
776       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
777     }
778     else {
779       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
780       CHKERRQ(ierr);
781       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
782       CHKERRQ(ierr);
783     }
784     while (its--) {
785       /* go down through the rows */
786       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
787                               mat->Mvctx); CHKERRQ(ierr);
788       for ( i=0; i<m; i++ ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] + shift;
791         v    = A->a + A->i[i] + shift;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = fshift + A->a[diag[i]+shift];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] + shift;
797         v    = B->a + B->i[i] + shift;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800       }
801       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
802                             mat->Mvctx); CHKERRQ(ierr);
803       /* come up through the rows */
804       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
805                               mat->Mvctx); CHKERRQ(ierr);
806       for ( i=m-1; i>-1; i-- ) {
807         n    = A->i[i+1] - A->i[i];
808         idx  = A->j + A->i[i] + shift;
809         v    = A->a + A->i[i] + shift;
810         sum  = b[i];
811         SPARSEDENSEMDOT(sum,xs,v,idx,n);
812         d    = fshift + A->a[diag[i]+shift];
813         n    = B->i[i+1] - B->i[i];
814         idx  = B->j + B->i[i] + shift;
815         v    = B->a + B->i[i] + shift;
816         SPARSEDENSEMDOT(sum,ls,v,idx,n);
817         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
818       }
819       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
820                             mat->Mvctx); CHKERRQ(ierr);
821     }
822   }
823   else if (flag & SOR_FORWARD_SWEEP){
824     if (flag & SOR_ZERO_INITIAL_GUESS) {
825       VecSet(&zero,mat->lvec);
826       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
827                               mat->Mvctx); CHKERRQ(ierr);
828       for ( i=0; i<m; i++ ) {
829         n    = diag[i] - A->i[i];
830         idx  = A->j + A->i[i] + shift;
831         v    = A->a + A->i[i] + shift;
832         sum  = b[i];
833         SPARSEDENSEMDOT(sum,xs,v,idx,n);
834         d    = fshift + A->a[diag[i]+shift];
835         n    = B->i[i+1] - B->i[i];
836         idx  = B->j + B->i[i] + shift;
837         v    = B->a + B->i[i] + shift;
838         SPARSEDENSEMDOT(sum,ls,v,idx,n);
839         x[i] = omega*(sum/d);
840       }
841       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
842                             mat->Mvctx); CHKERRQ(ierr);
843       its--;
844     }
845     while (its--) {
846       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
847       CHKERRQ(ierr);
848       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
849       CHKERRQ(ierr);
850       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
851                               mat->Mvctx); CHKERRQ(ierr);
852       for ( i=0; i<m; i++ ) {
853         n    = A->i[i+1] - A->i[i];
854         idx  = A->j + A->i[i] + shift;
855         v    = A->a + A->i[i] + shift;
856         sum  = b[i];
857         SPARSEDENSEMDOT(sum,xs,v,idx,n);
858         d    = fshift + A->a[diag[i]+shift];
859         n    = B->i[i+1] - B->i[i];
860         idx  = B->j + B->i[i] + shift;
861         v    = B->a + B->i[i] + shift;
862         SPARSEDENSEMDOT(sum,ls,v,idx,n);
863         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
864       }
865       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
866                             mat->Mvctx); CHKERRQ(ierr);
867     }
868   }
869   else if (flag & SOR_BACKWARD_SWEEP){
870     if (flag & SOR_ZERO_INITIAL_GUESS) {
871       VecSet(&zero,mat->lvec);
872       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
873                               mat->Mvctx); CHKERRQ(ierr);
874       for ( i=m-1; i>-1; i-- ) {
875         n    = A->i[i+1] - diag[i] - 1;
876         idx  = A->j + diag[i] + !shift;
877         v    = A->a + diag[i] + !shift;
878         sum  = b[i];
879         SPARSEDENSEMDOT(sum,xs,v,idx,n);
880         d    = fshift + A->a[diag[i]+shift];
881         n    = B->i[i+1] - B->i[i];
882         idx  = B->j + B->i[i] + shift;
883         v    = B->a + B->i[i] + shift;
884         SPARSEDENSEMDOT(sum,ls,v,idx,n);
885         x[i] = omega*(sum/d);
886       }
887       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
888                             mat->Mvctx); CHKERRQ(ierr);
889       its--;
890     }
891     while (its--) {
892       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
893                             mat->Mvctx); CHKERRQ(ierr);
894       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
895                             mat->Mvctx); CHKERRQ(ierr);
896       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
897                               mat->Mvctx); CHKERRQ(ierr);
898       for ( i=m-1; i>-1; i-- ) {
899         n    = A->i[i+1] - A->i[i];
900         idx  = A->j + A->i[i] + shift;
901         v    = A->a + A->i[i] + shift;
902         sum  = b[i];
903         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904         d    = fshift + A->a[diag[i]+shift];
905         n    = B->i[i+1] - B->i[i];
906         idx  = B->j + B->i[i] + shift;
907         v    = B->a + B->i[i] + shift;
908         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
910       }
911       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
912                             mat->Mvctx); CHKERRQ(ierr);
913     }
914   }
915   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
916     if (flag & SOR_ZERO_INITIAL_GUESS) {
917       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
918     }
919     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
920     CHKERRQ(ierr);
921     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
922     CHKERRQ(ierr);
923     while (its--) {
924       /* go down through the rows */
925       for ( i=0; i<m; i++ ) {
926         n    = A->i[i+1] - A->i[i];
927         idx  = A->j + A->i[i] + shift;
928         v    = A->a + A->i[i] + shift;
929         sum  = b[i];
930         SPARSEDENSEMDOT(sum,xs,v,idx,n);
931         d    = fshift + A->a[diag[i]+shift];
932         n    = B->i[i+1] - B->i[i];
933         idx  = B->j + B->i[i] + shift;
934         v    = B->a + B->i[i] + shift;
935         SPARSEDENSEMDOT(sum,ls,v,idx,n);
936         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
937       }
938       /* come up through the rows */
939       for ( i=m-1; i>-1; i-- ) {
940         n    = A->i[i+1] - A->i[i];
941         idx  = A->j + A->i[i] + shift;
942         v    = A->a + A->i[i] + shift;
943         sum  = b[i];
944         SPARSEDENSEMDOT(sum,xs,v,idx,n);
945         d    = fshift + A->a[diag[i]+shift];
946         n    = B->i[i+1] - B->i[i];
947         idx  = B->j + B->i[i] + shift;
948         v    = B->a + B->i[i] + shift;
949         SPARSEDENSEMDOT(sum,ls,v,idx,n);
950         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
951       }
952     }
953   }
954   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
955     if (flag & SOR_ZERO_INITIAL_GUESS) {
956       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
957     }
958     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
959     CHKERRQ(ierr);
960     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
961     CHKERRQ(ierr);
962     while (its--) {
963       for ( i=0; i<m; i++ ) {
964         n    = A->i[i+1] - A->i[i];
965         idx  = A->j + A->i[i] + shift;
966         v    = A->a + A->i[i] + shift;
967         sum  = b[i];
968         SPARSEDENSEMDOT(sum,xs,v,idx,n);
969         d    = fshift + A->a[diag[i]+shift];
970         n    = B->i[i+1] - B->i[i];
971         idx  = B->j + B->i[i] + shift;
972         v    = B->a + B->i[i] + shift;
973         SPARSEDENSEMDOT(sum,ls,v,idx,n);
974         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
975       }
976     }
977   }
978   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
979     if (flag & SOR_ZERO_INITIAL_GUESS) {
980       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
981     }
982     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
983                             mat->Mvctx); CHKERRQ(ierr);
984     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
985                             mat->Mvctx); CHKERRQ(ierr);
986     while (its--) {
987       for ( i=m-1; i>-1; i-- ) {
988         n    = A->i[i+1] - A->i[i];
989         idx  = A->j + A->i[i] + shift;
990         v    = A->a + A->i[i] + shift;
991         sum  = b[i];
992         SPARSEDENSEMDOT(sum,xs,v,idx,n);
993         d    = fshift + A->a[diag[i]+shift];
994         n    = B->i[i+1] - B->i[i];
995         idx  = B->j + B->i[i] + shift;
996         v    = B->a + B->i[i] + shift;
997         SPARSEDENSEMDOT(sum,ls,v,idx,n);
998         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
999       }
1000     }
1001   }
1002   return 0;
1003 }
1004 
1005 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1006                              int *nzalloc,int *mem)
1007 {
1008   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1009   Mat        A = mat->A, B = mat->B;
1010   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1011 
1012   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1013   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1014   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1015   if (flag == MAT_LOCAL) {
1016     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1017   } else if (flag == MAT_GLOBAL_MAX) {
1018     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1019     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1020   } else if (flag == MAT_GLOBAL_SUM) {
1021     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1022     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1023   }
1024   return 0;
1025 }
1026 
1027 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1028 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1029 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1030 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1031 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1032 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1033 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1034 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1035 
1036 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1037 {
1038   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1039 
1040   if (op == NO_NEW_NONZERO_LOCATIONS ||
1041       op == YES_NEW_NONZERO_LOCATIONS ||
1042       op == COLUMNS_SORTED ||
1043       op == ROW_ORIENTED) {
1044         MatSetOption(a->A,op);
1045         MatSetOption(a->B,op);
1046   }
1047   else if (op == ROWS_SORTED ||
1048            op == SYMMETRIC_MATRIX ||
1049            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1050            op == YES_NEW_DIAGONALS)
1051     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1052   else if (op == COLUMN_ORIENTED) {
1053     a->roworiented = 0;
1054     MatSetOption(a->A,op);
1055     MatSetOption(a->B,op);
1056   }
1057   else if (op == NO_NEW_DIAGONALS)
1058     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1059   else
1060     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1061   return 0;
1062 }
1063 
1064 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1065 {
1066   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1067   *m = mat->M; *n = mat->N;
1068   return 0;
1069 }
1070 
1071 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1072 {
1073   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1074   *m = mat->m; *n = mat->N;
1075   return 0;
1076 }
1077 
1078 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1079 {
1080   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081   *m = mat->rstart; *n = mat->rend;
1082   return 0;
1083 }
1084 
1085 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1086 {
1087   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1088   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1089   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1090   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1091 
1092   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1093   lrow = row - rstart;
1094 
1095   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1096   if (!v)   {pvA = 0; pvB = 0;}
1097   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1098   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1099   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1100   nztot = nzA + nzB;
1101 
1102   if (v  || idx) {
1103     if (nztot) {
1104       /* Sort by increasing column numbers, assuming A and B already sorted */
1105       int imark;
1106       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1107       if (v) {
1108         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1109         for ( i=0; i<nzB; i++ ) {
1110           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1111           else break;
1112         }
1113         imark = i;
1114         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1115         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1116       }
1117       if (idx) {
1118         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1119         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1120         for ( i=0; i<nzB; i++ ) {
1121           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1122           else break;
1123         }
1124         imark = i;
1125         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1126         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1127       }
1128     }
1129     else {*idx = 0; *v=0;}
1130   }
1131   *nz = nztot;
1132   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1133   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1134   return 0;
1135 }
1136 
1137 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1138 {
1139   if (idx) PetscFree(*idx);
1140   if (v) PetscFree(*v);
1141   return 0;
1142 }
1143 
1144 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1145 {
1146   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1147   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1148   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1149   double     sum = 0.0;
1150   Scalar     *v;
1151 
1152   if (aij->size == 1) {
1153     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1154   } else {
1155     if (type == NORM_FROBENIUS) {
1156       v = amat->a;
1157       for (i=0; i<amat->nz; i++ ) {
1158 #if defined(PETSC_COMPLEX)
1159         sum += real(conj(*v)*(*v)); v++;
1160 #else
1161         sum += (*v)*(*v); v++;
1162 #endif
1163       }
1164       v = bmat->a;
1165       for (i=0; i<bmat->nz; i++ ) {
1166 #if defined(PETSC_COMPLEX)
1167         sum += real(conj(*v)*(*v)); v++;
1168 #else
1169         sum += (*v)*(*v); v++;
1170 #endif
1171       }
1172       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1173       *norm = sqrt(*norm);
1174     }
1175     else if (type == NORM_1) { /* max column norm */
1176       double *tmp, *tmp2;
1177       int    *jj, *garray = aij->garray;
1178       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1179       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1180       PetscMemzero(tmp,aij->N*sizeof(double));
1181       *norm = 0.0;
1182       v = amat->a; jj = amat->j;
1183       for ( j=0; j<amat->nz; j++ ) {
1184         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1185       }
1186       v = bmat->a; jj = bmat->j;
1187       for ( j=0; j<bmat->nz; j++ ) {
1188         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1189       }
1190       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1191       for ( j=0; j<aij->N; j++ ) {
1192         if (tmp2[j] > *norm) *norm = tmp2[j];
1193       }
1194       PetscFree(tmp); PetscFree(tmp2);
1195     }
1196     else if (type == NORM_INFINITY) { /* max row norm */
1197       double ntemp = 0.0;
1198       for ( j=0; j<amat->m; j++ ) {
1199         v = amat->a + amat->i[j] + shift;
1200         sum = 0.0;
1201         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1202           sum += PetscAbsScalar(*v); v++;
1203         }
1204         v = bmat->a + bmat->i[j] + shift;
1205         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1206           sum += PetscAbsScalar(*v); v++;
1207         }
1208         if (sum > ntemp) ntemp = sum;
1209       }
1210       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1211     }
1212     else {
1213       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1214     }
1215   }
1216   return 0;
1217 }
1218 
1219 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1220 {
1221   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1222   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1223   int        ierr,shift = Aloc->indexshift;
1224   Mat        B;
1225   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1226   Scalar     *array;
1227 
1228   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1229   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1230          PETSC_NULL,&B); CHKERRQ(ierr);
1231 
1232   /* copy over the A part */
1233   Aloc = (Mat_SeqAIJ*) a->A->data;
1234   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1235   row = a->rstart;
1236   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1237   for ( i=0; i<m; i++ ) {
1238     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1239     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1240   }
1241   aj = Aloc->j;
1242   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1243 
1244   /* copy over the B part */
1245   Aloc = (Mat_SeqAIJ*) a->B->data;
1246   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1247   row = a->rstart;
1248   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1249   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1250   for ( i=0; i<m; i++ ) {
1251     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1252     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1253   }
1254   PetscFree(ct);
1255   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1256   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1257   if (matout) {
1258     *matout = B;
1259   } else {
1260     /* This isn't really an in-place transpose .... but free data structures from a */
1261     PetscFree(a->rowners);
1262     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1263     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1264     if (a->colmap) PetscFree(a->colmap);
1265     if (a->garray) PetscFree(a->garray);
1266     if (a->lvec) VecDestroy(a->lvec);
1267     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1268     PetscFree(a);
1269     PetscMemcpy(A,B,sizeof(struct _Mat));
1270     PetscHeaderDestroy(B);
1271   }
1272   return 0;
1273 }
1274 
1275 extern int MatPrintHelp_SeqAIJ(Mat);
1276 static int MatPrintHelp_MPIAIJ(Mat A)
1277 {
1278   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1279 
1280   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1281   else return 0;
1282 }
1283 
1284 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1285 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1286 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1287 /* -------------------------------------------------------------------*/
1288 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1289        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1290        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1291        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1292        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1293        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1294        MatLUFactor_MPIAIJ,0,
1295        MatRelax_MPIAIJ,
1296        MatTranspose_MPIAIJ,
1297        MatGetInfo_MPIAIJ,0,
1298        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1299        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1300        0,
1301        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1302        MatGetReordering_MPIAIJ,
1303        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1304        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1305        MatILUFactorSymbolic_MPIAIJ,0,
1306        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1307        0,0,0,
1308        0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1309        MatPrintHelp_MPIAIJ,
1310        MatScale_MPIAIJ};
1311 
1312 /*@C
1313    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1314    (the default parallel PETSc format).
1315 
1316    Input Parameters:
1317 .  comm - MPI communicator
1318 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1319 .  n - number of local columns (or PETSC_DECIDE to have calculated
1320            if N is given)
1321 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1322 .  N - number of global columns (or PETSC_DECIDE to have calculated
1323            if n is given)
1324 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1325            (same for all local rows)
1326 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1327            or null (possibly different for each row).  You must leave room
1328            for the diagonal entry even if it is zero.
1329 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1330            submatrix (same for all local rows).
1331 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1332            submatrix or null (possibly different for each row).
1333 
1334    Output Parameter:
1335 .  newmat - the matrix
1336 
1337    Notes:
1338    The AIJ format (also called the Yale sparse matrix format or
1339    compressed row storage), is fully compatible with standard Fortran 77
1340    storage.  That is, the stored row and column indices can begin at
1341    either one (as in Fortran) or zero.  See the users manual for details.
1342 
1343    The user MUST specify either the local or global matrix dimensions
1344    (possibly both).
1345 
1346    Storage Information:
1347    For a square global matrix we define each processor's diagonal portion
1348    to be its local rows and the corresponding columns (a square submatrix);
1349    each processor's off-diagonal portion encompasses the remainder of the
1350    local matrix (a rectangular submatrix).
1351 
1352    The user can specify preallocated storage for the diagonal part of
1353    the local submatrix with either d_nz or d_nnz (not both).  Set
1354    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1355    memory allocation.  Likewise, specify preallocated storage for the
1356    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1357 
1358    By default, this format uses inodes (identical nodes) when possible.
1359    We search for consecutive rows with the same nonzero structure, thereby
1360    reusing matrix information to achieve increased efficiency.
1361 
1362    Options Database Keys:
1363 $    -mat_aij_no_inode  - Do not use inodes
1364 $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1365 
1366 .keywords: matrix, aij, compressed row, sparse, parallel
1367 
1368 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1369 @*/
1370 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1371                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
1372 {
1373   Mat          mat;
1374   Mat_MPIAIJ   *a;
1375   int          ierr, i,sum[2],work[2];
1376 
1377   *newmat         = 0;
1378   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1379   PLogObjectCreate(mat);
1380   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1381   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1382   mat->destroy    = MatDestroy_MPIAIJ;
1383   mat->view       = MatView_MPIAIJ;
1384   mat->factor     = 0;
1385   mat->assembled  = PETSC_FALSE;
1386 
1387   a->insertmode = NOT_SET_VALUES;
1388   MPI_Comm_rank(comm,&a->rank);
1389   MPI_Comm_size(comm,&a->size);
1390 
1391   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1392     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1393 
1394   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1395     work[0] = m; work[1] = n;
1396     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1397     if (M == PETSC_DECIDE) M = sum[0];
1398     if (N == PETSC_DECIDE) N = sum[1];
1399   }
1400   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1401   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1402   a->m = m;
1403   a->n = n;
1404   a->N = N;
1405   a->M = M;
1406 
1407   /* build local table of row and column ownerships */
1408   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1409   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1410   a->cowners = a->rowners + a->size + 1;
1411   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1412   a->rowners[0] = 0;
1413   for ( i=2; i<=a->size; i++ ) {
1414     a->rowners[i] += a->rowners[i-1];
1415   }
1416   a->rstart = a->rowners[a->rank];
1417   a->rend   = a->rowners[a->rank+1];
1418   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1419   a->cowners[0] = 0;
1420   for ( i=2; i<=a->size; i++ ) {
1421     a->cowners[i] += a->cowners[i-1];
1422   }
1423   a->cstart = a->cowners[a->rank];
1424   a->cend   = a->cowners[a->rank+1];
1425 
1426   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1427   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1428   PLogObjectParent(mat,a->A);
1429   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1430   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1431   PLogObjectParent(mat,a->B);
1432 
1433   /* build cache for off array entries formed */
1434   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1435   a->colmap      = 0;
1436   a->garray      = 0;
1437   a->roworiented = 1;
1438 
1439   /* stuff used for matrix vector multiply */
1440   a->lvec      = 0;
1441   a->Mvctx     = 0;
1442 
1443   *newmat = mat;
1444   return 0;
1445 }
1446 
1447 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1448 {
1449   Mat        mat;
1450   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1451   int        ierr, len,flg;
1452 
1453   *newmat       = 0;
1454   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1455   PLogObjectCreate(mat);
1456   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1457   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1458   mat->destroy    = MatDestroy_MPIAIJ;
1459   mat->view       = MatView_MPIAIJ;
1460   mat->factor     = matin->factor;
1461   mat->assembled  = PETSC_TRUE;
1462 
1463   a->m          = oldmat->m;
1464   a->n          = oldmat->n;
1465   a->M          = oldmat->M;
1466   a->N          = oldmat->N;
1467 
1468   a->rstart     = oldmat->rstart;
1469   a->rend       = oldmat->rend;
1470   a->cstart     = oldmat->cstart;
1471   a->cend       = oldmat->cend;
1472   a->size       = oldmat->size;
1473   a->rank       = oldmat->rank;
1474   a->insertmode = NOT_SET_VALUES;
1475 
1476   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1477   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1478   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1479   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1480   if (oldmat->colmap) {
1481     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1482     PLogObjectMemory(mat,(a->N)*sizeof(int));
1483     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1484   } else a->colmap = 0;
1485   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1486     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1487     PLogObjectMemory(mat,len*sizeof(int));
1488     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1489   } else a->garray = 0;
1490 
1491   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1492   PLogObjectParent(mat,a->lvec);
1493   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1494   PLogObjectParent(mat,a->Mvctx);
1495   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1496   PLogObjectParent(mat,a->A);
1497   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1498   PLogObjectParent(mat,a->B);
1499   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1500   if (flg) {
1501     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1502   }
1503   *newmat = mat;
1504   return 0;
1505 }
1506 
1507 #include "sysio.h"
1508 
1509 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1510 {
1511   Mat          A;
1512   int          i, nz, ierr, j,rstart, rend, fd;
1513   Scalar       *vals,*svals;
1514   PetscObject  vobj = (PetscObject) bview;
1515   MPI_Comm     comm = vobj->comm;
1516   MPI_Status   status;
1517   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1518   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1519   int          tag = ((PetscObject)bview)->tag;
1520 
1521   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1522   if (!rank) {
1523     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1524     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1525     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1526   }
1527 
1528   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1529   M = header[1]; N = header[2];
1530   /* determine ownership of all rows */
1531   m = M/size + ((M % size) > rank);
1532   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1533   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1534   rowners[0] = 0;
1535   for ( i=2; i<=size; i++ ) {
1536     rowners[i] += rowners[i-1];
1537   }
1538   rstart = rowners[rank];
1539   rend   = rowners[rank+1];
1540 
1541   /* distribute row lengths to all processors */
1542   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1543   offlens = ourlens + (rend-rstart);
1544   if (!rank) {
1545     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1546     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1547     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1548     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1549     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1550     PetscFree(sndcounts);
1551   }
1552   else {
1553     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1554   }
1555 
1556   if (!rank) {
1557     /* calculate the number of nonzeros on each processor */
1558     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1559     PetscMemzero(procsnz,size*sizeof(int));
1560     for ( i=0; i<size; i++ ) {
1561       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1562         procsnz[i] += rowlengths[j];
1563       }
1564     }
1565     PetscFree(rowlengths);
1566 
1567     /* determine max buffer needed and allocate it */
1568     maxnz = 0;
1569     for ( i=0; i<size; i++ ) {
1570       maxnz = PetscMax(maxnz,procsnz[i]);
1571     }
1572     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1573 
1574     /* read in my part of the matrix column indices  */
1575     nz = procsnz[0];
1576     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1577     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1578 
1579     /* read in every one elses and ship off */
1580     for ( i=1; i<size; i++ ) {
1581       nz = procsnz[i];
1582       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1583       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1584     }
1585     PetscFree(cols);
1586   }
1587   else {
1588     /* determine buffer space needed for message */
1589     nz = 0;
1590     for ( i=0; i<m; i++ ) {
1591       nz += ourlens[i];
1592     }
1593     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1594 
1595     /* receive message of column indices*/
1596     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1597     MPI_Get_count(&status,MPI_INT,&maxnz);
1598     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1599   }
1600 
1601   /* loop over local rows, determining number of off diagonal entries */
1602   PetscMemzero(offlens,m*sizeof(int));
1603   jj = 0;
1604   for ( i=0; i<m; i++ ) {
1605     for ( j=0; j<ourlens[i]; j++ ) {
1606       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1607       jj++;
1608     }
1609   }
1610 
1611   /* create our matrix */
1612   for ( i=0; i<m; i++ ) {
1613     ourlens[i] -= offlens[i];
1614   }
1615   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1616   A = *newmat;
1617   MatSetOption(A,COLUMNS_SORTED);
1618   for ( i=0; i<m; i++ ) {
1619     ourlens[i] += offlens[i];
1620   }
1621 
1622   if (!rank) {
1623     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1624 
1625     /* read in my part of the matrix numerical values  */
1626     nz = procsnz[0];
1627     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1628 
1629     /* insert into matrix */
1630     jj      = rstart;
1631     smycols = mycols;
1632     svals   = vals;
1633     for ( i=0; i<m; i++ ) {
1634       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1635       smycols += ourlens[i];
1636       svals   += ourlens[i];
1637       jj++;
1638     }
1639 
1640     /* read in other processors and ship out */
1641     for ( i=1; i<size; i++ ) {
1642       nz = procsnz[i];
1643       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1644       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1645     }
1646     PetscFree(procsnz);
1647   }
1648   else {
1649     /* receive numeric values */
1650     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1651 
1652     /* receive message of values*/
1653     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1654     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1655     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1656 
1657     /* insert into matrix */
1658     jj      = rstart;
1659     smycols = mycols;
1660     svals   = vals;
1661     for ( i=0; i<m; i++ ) {
1662       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1663       smycols += ourlens[i];
1664       svals   += ourlens[i];
1665       jj++;
1666     }
1667   }
1668   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1669 
1670   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1672   return 0;
1673 }
1674