xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 18f449ed41991115d803c2e2c8f42c76f04cb975)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.98 1995/11/06 21:31:33 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         size = aij->size, *owners = aij->rowners;
90   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
106   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<size; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
120   nreceives = work[rank];
121   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
122   nmax = work[rank];
123   PetscFree(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PetscFree(owner);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<size; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PetscFree(starts); PetscFree(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PetscFree(send_status);
231   }
232   PetscFree(aij->send_waits); PetscFree(aij->svalues);
233 
234   aij->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
290   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[rank];
307   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
308   nmax = work[rank];
309   PetscFree(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<size; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PetscFree(starts);
344 
345   base = owners[rank];
346 
347   /*  wait on receives */
348   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PetscFree(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PetscFree(rvalues); PetscFree(lens);
372   PetscFree(owner); PetscFree(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PetscFree(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PetscFree(send_status);
388   }
389   PetscFree(send_waits); PetscFree(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PetscFree(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PetscFree(aij->colmap);
484   if (aij->garray) PetscFree(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
487   PetscFree(aij);
488   PLogObjectDestroy(mat);
489   PetscHeaderDestroy(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->size == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
504   return 0;
505 }
506 
507 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
508 {
509   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
510   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
511   int         ierr, format,shift = C->indexshift,rank;
512   PetscObject vobj = (PetscObject) viewer;
513   FILE        *fd;
514 
515   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
516     ierr = ViewerFileGetFormat_Private(viewer,&format);
517     if (format == FILE_FORMAT_INFO_DETAILED) {
518       int nz,nzalloc,mem;
519       MPI_Comm_rank(mat->comm,&rank);
520       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
521       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
522       MPIU_Seq_begin(mat->comm,1);
523       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
524                 nzalloc,mem);
525       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
526       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
527       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
528       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
529       fflush(fd);
530       MPIU_Seq_end(mat->comm,1);
531       ierr = VecScatterView(aij->Mvctx,viewer);
532       return 0;
533     }
534     else if (format == FILE_FORMAT_INFO) {
535       return 0;
536     }
537   }
538 
539   if (vobj->type == ASCII_FILE_VIEWER) {
540     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
541     MPIU_Seq_begin(mat->comm,1);
542     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
543            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
544            aij->cend);
545     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
546     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
547     fflush(fd);
548     MPIU_Seq_end(mat->comm,1);
549   }
550   else {
551     int size = aij->size;
552     rank = aij->rank;
553     if (size == 1) {
554       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
555     }
556     else {
557       /* assemble the entire matrix onto first processor. */
558       Mat         A;
559       Mat_SeqAIJ *Aloc;
560       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
561       Scalar      *a;
562 
563       if (!rank) {
564         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
565       }
566       else {
567         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
568       }
569       PLogObjectParent(mat,A);
570 
571 
572       /* copy over the A part */
573       Aloc = (Mat_SeqAIJ*) aij->A->data;
574       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
575       row = aij->rstart;
576       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
577       for ( i=0; i<m; i++ ) {
578         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
579         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
580       }
581       aj = Aloc->j;
582       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
583 
584       /* copy over the B part */
585       Aloc = (Mat_SeqAIJ*) aij->B->data;
586       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
587       row = aij->rstart;
588       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
589       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
590       for ( i=0; i<m; i++ ) {
591         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
592         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
593       }
594       PetscFree(ct);
595       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
596       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
597       if (!rank) {
598         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
599       }
600       ierr = MatDestroy(A); CHKERRQ(ierr);
601     }
602   }
603   return 0;
604 }
605 
606 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
607 {
608   Mat         mat = (Mat) obj;
609   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
610   int         ierr;
611   PetscObject vobj = (PetscObject) viewer;
612 
613   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
614   if (!viewer) {
615     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
616   }
617   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
618   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
619     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
620   }
621   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
622     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
623   }
624   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
625     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
626   }
627   else if (vobj->cookie == DRAW_COOKIE) {
628     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
629   }
630   else if (vobj->type == BINARY_FILE_VIEWER) {
631     return MatView_MPIAIJ_Binary(mat,viewer);
632   }
633   return 0;
634 }
635 
636 extern int MatMarkDiag_SeqAIJ(Mat);
637 /*
638     This has to provide several versions.
639 
640      1) per sequential
641      2) a) use only local smoothing updating outer values only once.
642         b) local smoothing updating outer values each inner iteration
643      3) color updating out values betwen colors.
644 */
645 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
646                            double fshift,int its,Vec xx)
647 {
648   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
649   Mat        AA = mat->A, BB = mat->B;
650   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
651   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
652   int        ierr,*idx, *diag;
653   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
654   Vec        tt;
655 
656   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
657 
658   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
659   xs = x + shift; /* shift by one for index start of 1 */
660   ls = ls + shift;
661   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
662   diag = A->diag;
663   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
664     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
665   }
666   if (flag & SOR_EISENSTAT) {
667     /* Let  A = L + U + D; where L is lower trianglar,
668     U is upper triangular, E is diagonal; This routine applies
669 
670             (L + E)^{-1} A (U + E)^{-1}
671 
672     to a vector efficiently using Eisenstat's trick. This is for
673     the case of SSOR preconditioner, so E is D/omega where omega
674     is the relaxation factor.
675     */
676     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
677     PLogObjectParent(matin,tt);
678     VecGetArray(tt,&t);
679     scale = (2.0/omega) - 1.0;
680     /*  x = (E + U)^{-1} b */
681     VecSet(&zero,mat->lvec);
682     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
683                               mat->Mvctx); CHKERRQ(ierr);
684     for ( i=m-1; i>-1; i-- ) {
685       n    = A->i[i+1] - diag[i] - 1;
686       idx  = A->j + diag[i] + !shift;
687       v    = A->a + diag[i] + !shift;
688       sum  = b[i];
689       SPARSEDENSEMDOT(sum,xs,v,idx,n);
690       d    = fshift + A->a[diag[i]+shift];
691       n    = B->i[i+1] - B->i[i];
692       idx  = B->j + B->i[i] + shift;
693       v    = B->a + B->i[i] + shift;
694       SPARSEDENSEMDOT(sum,ls,v,idx,n);
695       x[i] = omega*(sum/d);
696     }
697     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
698                             mat->Mvctx); CHKERRQ(ierr);
699 
700     /*  t = b - (2*E - D)x */
701     v = A->a;
702     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
703 
704     /*  t = (E + L)^{-1}t */
705     ts = t + shift; /* shifted by one for index start of a or mat->j*/
706     diag = A->diag;
707     VecSet(&zero,mat->lvec);
708     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
709                                                  mat->Mvctx); CHKERRQ(ierr);
710     for ( i=0; i<m; i++ ) {
711       n    = diag[i] - A->i[i];
712       idx  = A->j + A->i[i] + shift;
713       v    = A->a + A->i[i] + shift;
714       sum  = t[i];
715       SPARSEDENSEMDOT(sum,ts,v,idx,n);
716       d    = fshift + A->a[diag[i]+shift];
717       n    = B->i[i+1] - B->i[i];
718       idx  = B->j + B->i[i] + shift;
719       v    = B->a + B->i[i] + shift;
720       SPARSEDENSEMDOT(sum,ls,v,idx,n);
721       t[i] = omega*(sum/d);
722     }
723     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
724                                                     mat->Mvctx); CHKERRQ(ierr);
725     /*  x = x + t */
726     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
727     VecDestroy(tt);
728     return 0;
729   }
730 
731 
732   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
733     if (flag & SOR_ZERO_INITIAL_GUESS) {
734       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
735     }
736     else {
737       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
738       CHKERRQ(ierr);
739       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
740       CHKERRQ(ierr);
741     }
742     while (its--) {
743       /* go down through the rows */
744       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
745                               mat->Mvctx); CHKERRQ(ierr);
746       for ( i=0; i<m; i++ ) {
747         n    = A->i[i+1] - A->i[i];
748         idx  = A->j + A->i[i] + shift;
749         v    = A->a + A->i[i] + shift;
750         sum  = b[i];
751         SPARSEDENSEMDOT(sum,xs,v,idx,n);
752         d    = fshift + A->a[diag[i]+shift];
753         n    = B->i[i+1] - B->i[i];
754         idx  = B->j + B->i[i] + shift;
755         v    = B->a + B->i[i] + shift;
756         SPARSEDENSEMDOT(sum,ls,v,idx,n);
757         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
758       }
759       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
760                             mat->Mvctx); CHKERRQ(ierr);
761       /* come up through the rows */
762       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
763                               mat->Mvctx); CHKERRQ(ierr);
764       for ( i=m-1; i>-1; i-- ) {
765         n    = A->i[i+1] - A->i[i];
766         idx  = A->j + A->i[i] + shift;
767         v    = A->a + A->i[i] + shift;
768         sum  = b[i];
769         SPARSEDENSEMDOT(sum,xs,v,idx,n);
770         d    = fshift + A->a[diag[i]+shift];
771         n    = B->i[i+1] - B->i[i];
772         idx  = B->j + B->i[i] + shift;
773         v    = B->a + B->i[i] + shift;
774         SPARSEDENSEMDOT(sum,ls,v,idx,n);
775         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
776       }
777       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
778                             mat->Mvctx); CHKERRQ(ierr);
779     }
780   }
781   else if (flag & SOR_FORWARD_SWEEP){
782     if (flag & SOR_ZERO_INITIAL_GUESS) {
783       VecSet(&zero,mat->lvec);
784       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
785                               mat->Mvctx); CHKERRQ(ierr);
786       for ( i=0; i<m; i++ ) {
787         n    = diag[i] - A->i[i];
788         idx  = A->j + A->i[i] + shift;
789         v    = A->a + A->i[i] + shift;
790         sum  = b[i];
791         SPARSEDENSEMDOT(sum,xs,v,idx,n);
792         d    = fshift + A->a[diag[i]+shift];
793         n    = B->i[i+1] - B->i[i];
794         idx  = B->j + B->i[i] + shift;
795         v    = B->a + B->i[i] + shift;
796         SPARSEDENSEMDOT(sum,ls,v,idx,n);
797         x[i] = omega*(sum/d);
798       }
799       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
800                             mat->Mvctx); CHKERRQ(ierr);
801       its--;
802     }
803     while (its--) {
804       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
805       CHKERRQ(ierr);
806       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
807       CHKERRQ(ierr);
808       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
809                               mat->Mvctx); CHKERRQ(ierr);
810       for ( i=0; i<m; i++ ) {
811         n    = A->i[i+1] - A->i[i];
812         idx  = A->j + A->i[i] + shift;
813         v    = A->a + A->i[i] + shift;
814         sum  = b[i];
815         SPARSEDENSEMDOT(sum,xs,v,idx,n);
816         d    = fshift + A->a[diag[i]+shift];
817         n    = B->i[i+1] - B->i[i];
818         idx  = B->j + B->i[i] + shift;
819         v    = B->a + B->i[i] + shift;
820         SPARSEDENSEMDOT(sum,ls,v,idx,n);
821         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
822       }
823       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
824                             mat->Mvctx); CHKERRQ(ierr);
825     }
826   }
827   else if (flag & SOR_BACKWARD_SWEEP){
828     if (flag & SOR_ZERO_INITIAL_GUESS) {
829       VecSet(&zero,mat->lvec);
830       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
831                               mat->Mvctx); CHKERRQ(ierr);
832       for ( i=m-1; i>-1; i-- ) {
833         n    = A->i[i+1] - diag[i] - 1;
834         idx  = A->j + diag[i] + !shift;
835         v    = A->a + diag[i] + !shift;
836         sum  = b[i];
837         SPARSEDENSEMDOT(sum,xs,v,idx,n);
838         d    = fshift + A->a[diag[i]+shift];
839         n    = B->i[i+1] - B->i[i];
840         idx  = B->j + B->i[i] + shift;
841         v    = B->a + B->i[i] + shift;
842         SPARSEDENSEMDOT(sum,ls,v,idx,n);
843         x[i] = omega*(sum/d);
844       }
845       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
846                             mat->Mvctx); CHKERRQ(ierr);
847       its--;
848     }
849     while (its--) {
850       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
851                             mat->Mvctx); CHKERRQ(ierr);
852       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
853                             mat->Mvctx); CHKERRQ(ierr);
854       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
855                               mat->Mvctx); CHKERRQ(ierr);
856       for ( i=m-1; i>-1; i-- ) {
857         n    = A->i[i+1] - A->i[i];
858         idx  = A->j + A->i[i] + shift;
859         v    = A->a + A->i[i] + shift;
860         sum  = b[i];
861         SPARSEDENSEMDOT(sum,xs,v,idx,n);
862         d    = fshift + A->a[diag[i]+shift];
863         n    = B->i[i+1] - B->i[i];
864         idx  = B->j + B->i[i] + shift;
865         v    = B->a + B->i[i] + shift;
866         SPARSEDENSEMDOT(sum,ls,v,idx,n);
867         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
868       }
869       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
870                             mat->Mvctx); CHKERRQ(ierr);
871     }
872   }
873   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
876     }
877     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
878     CHKERRQ(ierr);
879     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
880     CHKERRQ(ierr);
881     while (its--) {
882       /* go down through the rows */
883       for ( i=0; i<m; i++ ) {
884         n    = A->i[i+1] - A->i[i];
885         idx  = A->j + A->i[i] + shift;
886         v    = A->a + A->i[i] + shift;
887         sum  = b[i];
888         SPARSEDENSEMDOT(sum,xs,v,idx,n);
889         d    = fshift + A->a[diag[i]+shift];
890         n    = B->i[i+1] - B->i[i];
891         idx  = B->j + B->i[i] + shift;
892         v    = B->a + B->i[i] + shift;
893         SPARSEDENSEMDOT(sum,ls,v,idx,n);
894         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
895       }
896       /* come up through the rows */
897       for ( i=m-1; i>-1; i-- ) {
898         n    = A->i[i+1] - A->i[i];
899         idx  = A->j + A->i[i] + shift;
900         v    = A->a + A->i[i] + shift;
901         sum  = b[i];
902         SPARSEDENSEMDOT(sum,xs,v,idx,n);
903         d    = fshift + A->a[diag[i]+shift];
904         n    = B->i[i+1] - B->i[i];
905         idx  = B->j + B->i[i] + shift;
906         v    = B->a + B->i[i] + shift;
907         SPARSEDENSEMDOT(sum,ls,v,idx,n);
908         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
909       }
910     }
911   }
912   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
913     if (flag & SOR_ZERO_INITIAL_GUESS) {
914       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
915     }
916     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
917     CHKERRQ(ierr);
918     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
919     CHKERRQ(ierr);
920     while (its--) {
921       for ( i=0; i<m; i++ ) {
922         n    = A->i[i+1] - A->i[i];
923         idx  = A->j + A->i[i] + shift;
924         v    = A->a + A->i[i] + shift;
925         sum  = b[i];
926         SPARSEDENSEMDOT(sum,xs,v,idx,n);
927         d    = fshift + A->a[diag[i]+shift];
928         n    = B->i[i+1] - B->i[i];
929         idx  = B->j + B->i[i] + shift;
930         v    = B->a + B->i[i] + shift;
931         SPARSEDENSEMDOT(sum,ls,v,idx,n);
932         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
933       }
934     }
935   }
936   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
937     if (flag & SOR_ZERO_INITIAL_GUESS) {
938       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
939     }
940     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
941                             mat->Mvctx); CHKERRQ(ierr);
942     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
943                             mat->Mvctx); CHKERRQ(ierr);
944     while (its--) {
945       for ( i=m-1; i>-1; i-- ) {
946         n    = A->i[i+1] - A->i[i];
947         idx  = A->j + A->i[i] + shift;
948         v    = A->a + A->i[i] + shift;
949         sum  = b[i];
950         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951         d    = fshift + A->a[diag[i]+shift];
952         n    = B->i[i+1] - B->i[i];
953         idx  = B->j + B->i[i] + shift;
954         v    = B->a + B->i[i] + shift;
955         SPARSEDENSEMDOT(sum,ls,v,idx,n);
956         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
957       }
958     }
959   }
960   return 0;
961 }
962 
963 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
964                              int *nzalloc,int *mem)
965 {
966   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
967   Mat        A = mat->A, B = mat->B;
968   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
969 
970   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
971   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
972   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
973   if (flag == MAT_LOCAL) {
974     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
975   } else if (flag == MAT_GLOBAL_MAX) {
976     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
977     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
978   } else if (flag == MAT_GLOBAL_SUM) {
979     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
980     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
981   }
982   return 0;
983 }
984 
985 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
986 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
987 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
988 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
989 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
990 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
991 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
992 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
993 
994 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
995 {
996   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
997 
998   if (op == NO_NEW_NONZERO_LOCATIONS ||
999       op == YES_NEW_NONZERO_LOCATIONS ||
1000       op == COLUMNS_SORTED ||
1001       op == ROW_ORIENTED) {
1002         MatSetOption(a->A,op);
1003         MatSetOption(a->B,op);
1004   }
1005   else if (op == ROWS_SORTED ||
1006            op == SYMMETRIC_MATRIX ||
1007            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1008            op == YES_NEW_DIAGONALS)
1009     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1010   else if (op == COLUMN_ORIENTED)
1011     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1012   else if (op == NO_NEW_DIAGONALS)
1013     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1014   else
1015     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1016   return 0;
1017 }
1018 
1019 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1020 {
1021   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1022   *m = mat->M; *n = mat->N;
1023   return 0;
1024 }
1025 
1026 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1027 {
1028   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1029   *m = mat->m; *n = mat->N;
1030   return 0;
1031 }
1032 
1033 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1034 {
1035   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1036   *m = mat->rstart; *n = mat->rend;
1037   return 0;
1038 }
1039 
1040 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1041 {
1042   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1043   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1044   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1045   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1046 
1047   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1048   lrow = row - rstart;
1049 
1050   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1051   if (!v)   {pvA = 0; pvB = 0;}
1052   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1053   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1054   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1055   nztot = nzA + nzB;
1056 
1057   if (v  || idx) {
1058     if (nztot) {
1059       /* Sort by increasing column numbers, assuming A and B already sorted */
1060       int imark;
1061       if (mat->assembled) {
1062         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1063       }
1064       if (v) {
1065         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1066         for ( i=0; i<nzB; i++ ) {
1067           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1068           else break;
1069         }
1070         imark = i;
1071         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1072         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1073       }
1074       if (idx) {
1075         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1076         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1077         for ( i=0; i<nzB; i++ ) {
1078           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1079           else break;
1080         }
1081         imark = i;
1082         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1083         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1084       }
1085     }
1086     else {*idx = 0; *v=0;}
1087   }
1088   *nz = nztot;
1089   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1090   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1091   return 0;
1092 }
1093 
1094 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1095 {
1096   if (idx) PetscFree(*idx);
1097   if (v) PetscFree(*v);
1098   return 0;
1099 }
1100 
1101 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1102 {
1103   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1104   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1105   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1106   double     sum = 0.0;
1107   Scalar     *v;
1108 
1109   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1110   if (aij->size == 1) {
1111     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1112   } else {
1113     if (type == NORM_FROBENIUS) {
1114       v = amat->a;
1115       for (i=0; i<amat->nz; i++ ) {
1116 #if defined(PETSC_COMPLEX)
1117         sum += real(conj(*v)*(*v)); v++;
1118 #else
1119         sum += (*v)*(*v); v++;
1120 #endif
1121       }
1122       v = bmat->a;
1123       for (i=0; i<bmat->nz; i++ ) {
1124 #if defined(PETSC_COMPLEX)
1125         sum += real(conj(*v)*(*v)); v++;
1126 #else
1127         sum += (*v)*(*v); v++;
1128 #endif
1129       }
1130       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1131       *norm = sqrt(*norm);
1132     }
1133     else if (type == NORM_1) { /* max column norm */
1134       double *tmp, *tmp2;
1135       int    *jj, *garray = aij->garray;
1136       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1137       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1138       PetscMemzero(tmp,aij->N*sizeof(double));
1139       *norm = 0.0;
1140       v = amat->a; jj = amat->j;
1141       for ( j=0; j<amat->nz; j++ ) {
1142         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1143       }
1144       v = bmat->a; jj = bmat->j;
1145       for ( j=0; j<bmat->nz; j++ ) {
1146         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1147       }
1148       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1149       for ( j=0; j<aij->N; j++ ) {
1150         if (tmp2[j] > *norm) *norm = tmp2[j];
1151       }
1152       PetscFree(tmp); PetscFree(tmp2);
1153     }
1154     else if (type == NORM_INFINITY) { /* max row norm */
1155       double ntemp = 0.0;
1156       for ( j=0; j<amat->m; j++ ) {
1157         v = amat->a + amat->i[j] + shift;
1158         sum = 0.0;
1159         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1160           sum += PetscAbsScalar(*v); v++;
1161         }
1162         v = bmat->a + bmat->i[j] + shift;
1163         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1164           sum += PetscAbsScalar(*v); v++;
1165         }
1166         if (sum > ntemp) ntemp = sum;
1167       }
1168       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1169     }
1170     else {
1171       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1172     }
1173   }
1174   return 0;
1175 }
1176 
1177 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1178 {
1179   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1180   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1181   int        ierr,shift = Aloc->indexshift;
1182   Mat        B;
1183   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1184   Scalar     *array;
1185 
1186   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1187   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1188   CHKERRQ(ierr);
1189 
1190   /* copy over the A part */
1191   Aloc = (Mat_SeqAIJ*) a->A->data;
1192   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1193   row = a->rstart;
1194   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1195   for ( i=0; i<m; i++ ) {
1196     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1197     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1198   }
1199   aj = Aloc->j;
1200   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1201 
1202   /* copy over the B part */
1203   Aloc = (Mat_SeqAIJ*) a->B->data;
1204   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1205   row = a->rstart;
1206   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1207   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1208   for ( i=0; i<m; i++ ) {
1209     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1210     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1211   }
1212   PetscFree(ct);
1213   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1214   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1215   if (matout) {
1216     *matout = B;
1217   } else {
1218     /* This isn't really an in-place transpose .... but free data structures from a */
1219     PetscFree(a->rowners);
1220     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1221     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1222     if (a->colmap) PetscFree(a->colmap);
1223     if (a->garray) PetscFree(a->garray);
1224     if (a->lvec) VecDestroy(a->lvec);
1225     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1226     PetscFree(a);
1227     PetscMemcpy(A,B,sizeof(struct _Mat));
1228     PetscHeaderDestroy(B);
1229   }
1230   return 0;
1231 }
1232 
1233 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1234 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int);
1235 
1236 /* -------------------------------------------------------------------*/
1237 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1238        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1239        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1240        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1241        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1242        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1243        MatLUFactor_MPIAIJ,0,
1244        MatRelax_MPIAIJ,
1245        MatTranspose_MPIAIJ,
1246        MatGetInfo_MPIAIJ,0,
1247        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1248        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1249        0,
1250        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1251        MatGetReordering_MPIAIJ,
1252        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1253        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1254        MatILUFactorSymbolic_MPIAIJ,0,
1255        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1256 
1257 /*@C
1258    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1259    (the default parallel PETSc format).
1260 
1261    Input Parameters:
1262 .  comm - MPI communicator
1263 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1264 .  n - number of local columns (or PETSC_DECIDE to have calculated
1265            if N is given)
1266 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1267 .  N - number of global columns (or PETSC_DECIDE to have calculated
1268            if n is given)
1269 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1270            (same for all local rows)
1271 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1272            or null (possibly different for each row).  You must leave room
1273            for the diagonal entry even if it is zero.
1274 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1275            submatrix (same for all local rows).
1276 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1277            submatrix or null (possibly different for each row).
1278 
1279    Output Parameter:
1280 .  newmat - the matrix
1281 
1282    Notes:
1283    The AIJ format (also called the Yale sparse matrix format or
1284    compressed row storage), is fully compatible with standard Fortran 77
1285    storage.  That is, the stored row and column indices can begin at
1286    either one (as in Fortran) or zero.  See the users manual for details.
1287 
1288    The user MUST specify either the local or global matrix dimensions
1289    (possibly both).
1290 
1291    Storage Information:
1292    For a square global matrix we define each processor's diagonal portion
1293    to be its local rows and the corresponding columns (a square submatrix);
1294    each processor's off-diagonal portion encompasses the remainder of the
1295    local matrix (a rectangular submatrix).
1296 
1297    The user can specify preallocated storage for the diagonal part of
1298    the local submatrix with either d_nz or d_nnz (not both). Set both
1299    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1300    Likewise, specify preallocated storage for the off-diagonal part of
1301    the local submatrix with o_nz or o_nnz (not both).
1302 
1303 .keywords: matrix, aij, compressed row, sparse, parallel
1304 
1305 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1306 @*/
1307 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1308                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1309 {
1310   Mat          mat;
1311   Mat_MPIAIJ   *a;
1312   int          ierr, i,sum[2],work[2];
1313 
1314   *newmat         = 0;
1315   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1316   PLogObjectCreate(mat);
1317   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1318   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1319   mat->destroy    = MatDestroy_MPIAIJ;
1320   mat->view       = MatView_MPIAIJ;
1321   mat->factor     = 0;
1322 
1323   a->insertmode = NOT_SET_VALUES;
1324   MPI_Comm_rank(comm,&a->rank);
1325   MPI_Comm_size(comm,&a->size);
1326 
1327   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1328     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1329 
1330   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1331     work[0] = m; work[1] = n;
1332     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1333     if (M == PETSC_DECIDE) M = sum[0];
1334     if (N == PETSC_DECIDE) N = sum[1];
1335   }
1336   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1337   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1338   a->m = m;
1339   a->n = n;
1340   a->N = N;
1341   a->M = M;
1342 
1343   /* build local table of row and column ownerships */
1344   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1345   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1346   a->cowners = a->rowners + a->size + 1;
1347   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1348   a->rowners[0] = 0;
1349   for ( i=2; i<=a->size; i++ ) {
1350     a->rowners[i] += a->rowners[i-1];
1351   }
1352   a->rstart = a->rowners[a->rank];
1353   a->rend   = a->rowners[a->rank+1];
1354   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1355   a->cowners[0] = 0;
1356   for ( i=2; i<=a->size; i++ ) {
1357     a->cowners[i] += a->cowners[i-1];
1358   }
1359   a->cstart = a->cowners[a->rank];
1360   a->cend   = a->cowners[a->rank+1];
1361 
1362   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1363   PLogObjectParent(mat,a->A);
1364   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1365   PLogObjectParent(mat,a->B);
1366 
1367   /* build cache for off array entries formed */
1368   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1369   a->colmap    = 0;
1370   a->garray    = 0;
1371 
1372   /* stuff used for matrix vector multiply */
1373   a->lvec      = 0;
1374   a->Mvctx     = 0;
1375   a->assembled = 0;
1376 
1377   *newmat = mat;
1378   return 0;
1379 }
1380 
1381 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1382 {
1383   Mat        mat;
1384   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1385   int        ierr, len;
1386 
1387   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1388   *newmat       = 0;
1389   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1390   PLogObjectCreate(mat);
1391   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1392   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1393   mat->destroy  = MatDestroy_MPIAIJ;
1394   mat->view     = MatView_MPIAIJ;
1395   mat->factor   = matin->factor;
1396 
1397   a->m          = oldmat->m;
1398   a->n          = oldmat->n;
1399   a->M          = oldmat->M;
1400   a->N          = oldmat->N;
1401 
1402   a->assembled  = 1;
1403   a->rstart     = oldmat->rstart;
1404   a->rend       = oldmat->rend;
1405   a->cstart     = oldmat->cstart;
1406   a->cend       = oldmat->cend;
1407   a->size       = oldmat->size;
1408   a->rank       = oldmat->rank;
1409   a->insertmode = NOT_SET_VALUES;
1410 
1411   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1412   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1413   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1414   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1415   if (oldmat->colmap) {
1416     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1417     PLogObjectMemory(mat,(a->N)*sizeof(int));
1418     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1419   } else a->colmap = 0;
1420   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1421     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1422     PLogObjectMemory(mat,len*sizeof(int));
1423     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1424   } else a->garray = 0;
1425 
1426   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1427   PLogObjectParent(mat,a->lvec);
1428   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1429   PLogObjectParent(mat,a->Mvctx);
1430   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1431   PLogObjectParent(mat,a->A);
1432   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1433   PLogObjectParent(mat,a->B);
1434   *newmat = mat;
1435   return 0;
1436 }
1437 
1438 #include "sysio.h"
1439 
1440 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1441 {
1442   Mat          A;
1443   int          i, nz, ierr, j,rstart, rend, fd;
1444   Scalar       *vals,*svals;
1445   PetscObject  vobj = (PetscObject) bview;
1446   MPI_Comm     comm = vobj->comm;
1447   MPI_Status   status;
1448   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1449   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1450   int          tag = ((PetscObject)bview)->tag;
1451 
1452   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1453   if (!rank) {
1454     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1455     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1456     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1457   }
1458 
1459   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1460   M = header[1]; N = header[2];
1461   /* determine ownership of all rows */
1462   m = M/size + ((M % size) > rank);
1463   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1464   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1465   rowners[0] = 0;
1466   for ( i=2; i<=size; i++ ) {
1467     rowners[i] += rowners[i-1];
1468   }
1469   rstart = rowners[rank];
1470   rend   = rowners[rank+1];
1471 
1472   /* distribute row lengths to all processors */
1473   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1474   offlens = ourlens + (rend-rstart);
1475   if (!rank) {
1476     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1477     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1478     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1479     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1480     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1481     PetscFree(sndcounts);
1482   }
1483   else {
1484     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1485   }
1486 
1487   if (!rank) {
1488     /* calculate the number of nonzeros on each processor */
1489     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1490     PetscMemzero(procsnz,size*sizeof(int));
1491     for ( i=0; i<size; i++ ) {
1492       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1493         procsnz[i] += rowlengths[j];
1494       }
1495     }
1496     PetscFree(rowlengths);
1497 
1498     /* determine max buffer needed and allocate it */
1499     maxnz = 0;
1500     for ( i=0; i<size; i++ ) {
1501       maxnz = PetscMax(maxnz,procsnz[i]);
1502     }
1503     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1504 
1505     /* read in my part of the matrix column indices  */
1506     nz = procsnz[0];
1507     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1508     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1509 
1510     /* read in every one elses and ship off */
1511     for ( i=1; i<size; i++ ) {
1512       nz = procsnz[i];
1513       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1514       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1515     }
1516     PetscFree(cols);
1517   }
1518   else {
1519     /* determine buffer space needed for message */
1520     nz = 0;
1521     for ( i=0; i<m; i++ ) {
1522       nz += ourlens[i];
1523     }
1524     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1525 
1526     /* receive message of column indices*/
1527     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1528     MPI_Get_count(&status,MPI_INT,&maxnz);
1529     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1530   }
1531 
1532   /* loop over local rows, determining number of off diagonal entries */
1533   PetscMemzero(offlens,m*sizeof(int));
1534   jj = 0;
1535   for ( i=0; i<m; i++ ) {
1536     for ( j=0; j<ourlens[i]; j++ ) {
1537       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1538       jj++;
1539     }
1540   }
1541 
1542   /* create our matrix */
1543   for ( i=0; i<m; i++ ) {
1544     ourlens[i] -= offlens[i];
1545   }
1546   if (type == MATMPIAIJ) {
1547     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1548   }
1549   else if (type == MATMPIROW) {
1550     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1551   }
1552   A = *newmat;
1553   MatSetOption(A,COLUMNS_SORTED);
1554   for ( i=0; i<m; i++ ) {
1555     ourlens[i] += offlens[i];
1556   }
1557 
1558   if (!rank) {
1559     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1560 
1561     /* read in my part of the matrix numerical values  */
1562     nz = procsnz[0];
1563     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1564 
1565     /* insert into matrix */
1566     jj      = rstart;
1567     smycols = mycols;
1568     svals   = vals;
1569     for ( i=0; i<m; i++ ) {
1570       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1571       smycols += ourlens[i];
1572       svals   += ourlens[i];
1573       jj++;
1574     }
1575 
1576     /* read in other processors and ship out */
1577     for ( i=1; i<size; i++ ) {
1578       nz = procsnz[i];
1579       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1580       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1581     }
1582     PetscFree(procsnz);
1583   }
1584   else {
1585     /* receive numeric values */
1586     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1587 
1588     /* receive message of values*/
1589     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1590     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1591     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1592 
1593     /* insert into matrix */
1594     jj      = rstart;
1595     smycols = mycols;
1596     svals   = vals;
1597     for ( i=0; i<m; i++ ) {
1598       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1599       smycols += ourlens[i];
1600       svals   += ourlens[i];
1601       jj++;
1602     }
1603   }
1604   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1605 
1606   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1608   return 0;
1609 }
1610