xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ce7ae38c9ff27d51eded9498feee4e476f85e43c)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.84 1995/10/03 18:38:30 curfman Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscZero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->numtids == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         numtids = aij->numtids, *owners = aij->rowners;
90   int         mytid = aij->mytid,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscZero(nprocs,2*numtids*sizeof(int)); procs = nprocs + numtids;
106   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<numtids; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,numtids,MPI_INT,MPI_SUM,comm);
120   nreceives = work[mytid];
121   MPI_Allreduce( nprocs, work,numtids,MPI_INT,MPI_MAX,comm);
122   nmax = work[mytid];
123   PETSCFREE(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PETSCFREE(owner);
161   starts[0] = 0;
162   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<numtids; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PETSCFREE(starts); PETSCFREE(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PETSCFREE(send_status);
231   }
232   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
233 
234   aij->insertmode = NOTSETVALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscZero(nprocs,2*numtids*sizeof(int)); procs = nprocs + numtids;
290   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<numtids; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,numtids,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[mytid];
307   MPI_Allreduce( nprocs, work,numtids,MPI_INT,MPI_MAX,comm);
308   nmax = work[mytid];
309   PETSCFREE(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<numtids; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PETSCFREE(starts);
344 
345   base = owners[mytid];
346 
347   /*  wait on receives */
348   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PETSCFREE(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PETSCFREE(rvalues); PETSCFREE(lens);
372   PETSCFREE(owner); PETSCFREE(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PETSCFREE(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PETSCFREE(send_status);
388   }
389   PETSCFREE(send_waits); PETSCFREE(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTERALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTERALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTERALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTERALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTERALL|SCATTERREVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTERALL|SCATTERREVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTERALL|SCATTERREVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTERALL|SCATTERREVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PETSCFREE(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PETSCFREE(aij->colmap);
484   if (aij->garray) PETSCFREE(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
487   PETSCFREE(aij);
488   PLogObjectDestroy(mat);
489   PETSCHEADERDESTROY(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->numtids == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   return 0;
504 }
505 
506 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
507 {
508   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
509   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
510   int         ierr, format,shift = C->indexshift;
511   PetscObject vobj = (PetscObject) viewer;
512   FILE        *fd;
513 
514   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
515     ierr = ViewerFileGetFormat_Private(viewer,&format);
516     if (format == FILE_FORMAT_INFO) {
517       return 0;
518     }
519   }
520 
521   if (vobj->type == ASCII_FILE_VIEWER) {
522     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
523     MPIU_Seq_begin(mat->comm,1);
524     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
525            aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
526            aij->cend);
527     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
528     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
529     fflush(fd);
530     MPIU_Seq_end(mat->comm,1);
531   }
532   else {
533     int numtids = aij->numtids, mytid = aij->mytid;
534     if (numtids == 1) {
535       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
536     }
537     else {
538       /* assemble the entire matrix onto first processor. */
539       Mat         A;
540       Mat_SeqAIJ *Aloc;
541       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
542       Scalar      *a;
543 
544       if (!mytid) {
545         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
546       }
547       else {
548         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
549       }
550       PLogObjectParent(mat,A);
551 
552 
553       /* copy over the A part */
554       Aloc = (Mat_SeqAIJ*) aij->A->data;
555       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
556       row = aij->rstart;
557       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
558       for ( i=0; i<m; i++ ) {
559         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
560         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
561       }
562       aj = Aloc->j;
563       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
564 
565       /* copy over the B part */
566       Aloc = (Mat_SeqAIJ*) aij->B->data;
567       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
568       row = aij->rstart;
569       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
570       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
571       for ( i=0; i<m; i++ ) {
572         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
573         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
574       }
575       PETSCFREE(ct);
576       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
577       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
578       if (!mytid) {
579         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
580       }
581       ierr = MatDestroy(A); CHKERRQ(ierr);
582     }
583   }
584   return 0;
585 }
586 
587 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
588 {
589   Mat         mat = (Mat) obj;
590   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
591   int         ierr;
592   PetscObject vobj = (PetscObject) viewer;
593 
594   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
595   if (!viewer) {
596     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
597   }
598   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
599   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
600     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
601   }
602   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
603     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
604   }
605   else if (vobj->cookie == DRAW_COOKIE) {
606     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
607   }
608   else if (vobj->type == BINARY_FILE_VIEWER) {
609     return MatView_MPIAIJ_Binary(mat,viewer);
610   }
611   return 0;
612 }
613 
614 extern int MatMarkDiag_SeqAIJ(Mat);
615 /*
616     This has to provide several versions.
617 
618      1) per sequential
619      2) a) use only local smoothing updating outer values only once.
620         b) local smoothing updating outer values each inner iteration
621      3) color updating out values betwen colors.
622 */
623 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
624                            double fshift,int its,Vec xx)
625 {
626   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
627   Mat        AA = mat->A, BB = mat->B;
628   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
629   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
630   int        ierr,*idx, *diag;
631   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
632   Vec        tt;
633 
634   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
635 
636   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
637   xs = x + shift; /* shift by one for index start of 1 */
638   ls = ls + shift;
639   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
640   diag = A->diag;
641   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
642     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
643   }
644   if (flag & SOR_EISENSTAT) {
645     /* Let  A = L + U + D; where L is lower trianglar,
646     U is upper triangular, E is diagonal; This routine applies
647 
648             (L + E)^{-1} A (U + E)^{-1}
649 
650     to a vector efficiently using Eisenstat's trick. This is for
651     the case of SSOR preconditioner, so E is D/omega where omega
652     is the relaxation factor.
653     */
654     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
655     PLogObjectParent(matin,tt);
656     VecGetArray(tt,&t);
657     scale = (2.0/omega) - 1.0;
658     /*  x = (E + U)^{-1} b */
659     VecSet(&zero,mat->lvec);
660     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
661                               mat->Mvctx); CHKERRQ(ierr);
662     for ( i=m-1; i>-1; i-- ) {
663       n    = A->i[i+1] - diag[i] - 1;
664       idx  = A->j + diag[i] + !shift;
665       v    = A->a + diag[i] + !shift;
666       sum  = b[i];
667       SPARSEDENSEMDOT(sum,xs,v,idx,n);
668       d    = fshift + A->a[diag[i]+shift];
669       n    = B->i[i+1] - B->i[i];
670       idx  = B->j + B->i[i] + shift;
671       v    = B->a + B->i[i] + shift;
672       SPARSEDENSEMDOT(sum,ls,v,idx,n);
673       x[i] = omega*(sum/d);
674     }
675     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
676                             mat->Mvctx); CHKERRQ(ierr);
677 
678     /*  t = b - (2*E - D)x */
679     v = A->a;
680     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
681 
682     /*  t = (E + L)^{-1}t */
683     ts = t + shift; /* shifted by one for index start of a or mat->j*/
684     diag = A->diag;
685     VecSet(&zero,mat->lvec);
686     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
687                                                  mat->Mvctx); CHKERRQ(ierr);
688     for ( i=0; i<m; i++ ) {
689       n    = diag[i] - A->i[i];
690       idx  = A->j + A->i[i] + shift;
691       v    = A->a + A->i[i] + shift;
692       sum  = t[i];
693       SPARSEDENSEMDOT(sum,ts,v,idx,n);
694       d    = fshift + A->a[diag[i]+shift];
695       n    = B->i[i+1] - B->i[i];
696       idx  = B->j + B->i[i] + shift;
697       v    = B->a + B->i[i] + shift;
698       SPARSEDENSEMDOT(sum,ls,v,idx,n);
699       t[i] = omega*(sum/d);
700     }
701     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
702                                                     mat->Mvctx); CHKERRQ(ierr);
703     /*  x = x + t */
704     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
705     VecDestroy(tt);
706     return 0;
707   }
708 
709 
710   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
711     if (flag & SOR_ZERO_INITIAL_GUESS) {
712       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
713     }
714     else {
715       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
716       CHKERRQ(ierr);
717       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
718       CHKERRQ(ierr);
719     }
720     while (its--) {
721       /* go down through the rows */
722       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
723                               mat->Mvctx); CHKERRQ(ierr);
724       for ( i=0; i<m; i++ ) {
725         n    = A->i[i+1] - A->i[i];
726         idx  = A->j + A->i[i] + shift;
727         v    = A->a + A->i[i] + shift;
728         sum  = b[i];
729         SPARSEDENSEMDOT(sum,xs,v,idx,n);
730         d    = fshift + A->a[diag[i]+shift];
731         n    = B->i[i+1] - B->i[i];
732         idx  = B->j + B->i[i] + shift;
733         v    = B->a + B->i[i] + shift;
734         SPARSEDENSEMDOT(sum,ls,v,idx,n);
735         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
736       }
737       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
738                             mat->Mvctx); CHKERRQ(ierr);
739       /* come up through the rows */
740       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
741                               mat->Mvctx); CHKERRQ(ierr);
742       for ( i=m-1; i>-1; i-- ) {
743         n    = A->i[i+1] - A->i[i];
744         idx  = A->j + A->i[i] + shift;
745         v    = A->a + A->i[i] + shift;
746         sum  = b[i];
747         SPARSEDENSEMDOT(sum,xs,v,idx,n);
748         d    = fshift + A->a[diag[i]+shift];
749         n    = B->i[i+1] - B->i[i];
750         idx  = B->j + B->i[i] + shift;
751         v    = B->a + B->i[i] + shift;
752         SPARSEDENSEMDOT(sum,ls,v,idx,n);
753         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
754       }
755       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
756                             mat->Mvctx); CHKERRQ(ierr);
757     }
758   }
759   else if (flag & SOR_FORWARD_SWEEP){
760     if (flag & SOR_ZERO_INITIAL_GUESS) {
761       VecSet(&zero,mat->lvec);
762       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
763                               mat->Mvctx); CHKERRQ(ierr);
764       for ( i=0; i<m; i++ ) {
765         n    = diag[i] - A->i[i];
766         idx  = A->j + A->i[i] + shift;
767         v    = A->a + A->i[i] + shift;
768         sum  = b[i];
769         SPARSEDENSEMDOT(sum,xs,v,idx,n);
770         d    = fshift + A->a[diag[i]+shift];
771         n    = B->i[i+1] - B->i[i];
772         idx  = B->j + B->i[i] + shift;
773         v    = B->a + B->i[i] + shift;
774         SPARSEDENSEMDOT(sum,ls,v,idx,n);
775         x[i] = omega*(sum/d);
776       }
777       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
778                             mat->Mvctx); CHKERRQ(ierr);
779       its--;
780     }
781     while (its--) {
782       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
783       CHKERRQ(ierr);
784       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
785       CHKERRQ(ierr);
786       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
787                               mat->Mvctx); CHKERRQ(ierr);
788       for ( i=0; i<m; i++ ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] + shift;
791         v    = A->a + A->i[i] + shift;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = fshift + A->a[diag[i]+shift];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] + shift;
797         v    = B->a + B->i[i] + shift;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800       }
801       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
802                             mat->Mvctx); CHKERRQ(ierr);
803     }
804   }
805   else if (flag & SOR_BACKWARD_SWEEP){
806     if (flag & SOR_ZERO_INITIAL_GUESS) {
807       VecSet(&zero,mat->lvec);
808       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
809                               mat->Mvctx); CHKERRQ(ierr);
810       for ( i=m-1; i>-1; i-- ) {
811         n    = A->i[i+1] - diag[i] - 1;
812         idx  = A->j + diag[i] + !shift;
813         v    = A->a + diag[i] + !shift;
814         sum  = b[i];
815         SPARSEDENSEMDOT(sum,xs,v,idx,n);
816         d    = fshift + A->a[diag[i]+shift];
817         n    = B->i[i+1] - B->i[i];
818         idx  = B->j + B->i[i] + shift;
819         v    = B->a + B->i[i] + shift;
820         SPARSEDENSEMDOT(sum,ls,v,idx,n);
821         x[i] = omega*(sum/d);
822       }
823       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
824                             mat->Mvctx); CHKERRQ(ierr);
825       its--;
826     }
827     while (its--) {
828       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN,
829                             mat->Mvctx); CHKERRQ(ierr);
830       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN,
831                             mat->Mvctx); CHKERRQ(ierr);
832       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
833                               mat->Mvctx); CHKERRQ(ierr);
834       for ( i=m-1; i>-1; i-- ) {
835         n    = A->i[i+1] - A->i[i];
836         idx  = A->j + A->i[i] + shift;
837         v    = A->a + A->i[i] + shift;
838         sum  = b[i];
839         SPARSEDENSEMDOT(sum,xs,v,idx,n);
840         d    = fshift + A->a[diag[i]+shift];
841         n    = B->i[i+1] - B->i[i];
842         idx  = B->j + B->i[i] + shift;
843         v    = B->a + B->i[i] + shift;
844         SPARSEDENSEMDOT(sum,ls,v,idx,n);
845         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
846       }
847       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
848                             mat->Mvctx); CHKERRQ(ierr);
849     }
850   }
851   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
852     if (flag & SOR_ZERO_INITIAL_GUESS) {
853       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
854     }
855     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
856     CHKERRQ(ierr);
857     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
858     CHKERRQ(ierr);
859     while (its--) {
860       /* go down through the rows */
861       for ( i=0; i<m; i++ ) {
862         n    = A->i[i+1] - A->i[i];
863         idx  = A->j + A->i[i] + shift;
864         v    = A->a + A->i[i] + shift;
865         sum  = b[i];
866         SPARSEDENSEMDOT(sum,xs,v,idx,n);
867         d    = fshift + A->a[diag[i]+shift];
868         n    = B->i[i+1] - B->i[i];
869         idx  = B->j + B->i[i] + shift;
870         v    = B->a + B->i[i] + shift;
871         SPARSEDENSEMDOT(sum,ls,v,idx,n);
872         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
873       }
874       /* come up through the rows */
875       for ( i=m-1; i>-1; i-- ) {
876         n    = A->i[i+1] - A->i[i];
877         idx  = A->j + A->i[i] + shift;
878         v    = A->a + A->i[i] + shift;
879         sum  = b[i];
880         SPARSEDENSEMDOT(sum,xs,v,idx,n);
881         d    = fshift + A->a[diag[i]+shift];
882         n    = B->i[i+1] - B->i[i];
883         idx  = B->j + B->i[i] + shift;
884         v    = B->a + B->i[i] + shift;
885         SPARSEDENSEMDOT(sum,ls,v,idx,n);
886         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
887       }
888     }
889   }
890   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
893     }
894     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
895     CHKERRQ(ierr);
896     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
897     CHKERRQ(ierr);
898     while (its--) {
899       for ( i=0; i<m; i++ ) {
900         n    = A->i[i+1] - A->i[i];
901         idx  = A->j + A->i[i] + shift;
902         v    = A->a + A->i[i] + shift;
903         sum  = b[i];
904         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905         d    = fshift + A->a[diag[i]+shift];
906         n    = B->i[i+1] - B->i[i];
907         idx  = B->j + B->i[i] + shift;
908         v    = B->a + B->i[i] + shift;
909         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
911       }
912     }
913   }
914   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
915     if (flag & SOR_ZERO_INITIAL_GUESS) {
916       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
917     }
918     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,
919                             mat->Mvctx); CHKERRQ(ierr);
920     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,
921                             mat->Mvctx); CHKERRQ(ierr);
922     while (its--) {
923       for ( i=m-1; i>-1; i-- ) {
924         n    = A->i[i+1] - A->i[i];
925         idx  = A->j + A->i[i] + shift;
926         v    = A->a + A->i[i] + shift;
927         sum  = b[i];
928         SPARSEDENSEMDOT(sum,xs,v,idx,n);
929         d    = fshift + A->a[diag[i]+shift];
930         n    = B->i[i+1] - B->i[i];
931         idx  = B->j + B->i[i] + shift;
932         v    = B->a + B->i[i] + shift;
933         SPARSEDENSEMDOT(sum,ls,v,idx,n);
934         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
935       }
936     }
937   }
938   return 0;
939 }
940 
941 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
942                              int *nzalloc,int *mem)
943 {
944   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
945   Mat        A = mat->A, B = mat->B;
946   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
947 
948   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
949   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
950   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
951   if (flag == MAT_LOCAL) {
952     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
953   } else if (flag == MAT_GLOBAL_MAX) {
954     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
955     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
956   } else if (flag == MAT_GLOBAL_SUM) {
957     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
958     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
959   }
960   return 0;
961 }
962 
963 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
964 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
965 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
966 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
967 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
968 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
969 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
970 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
971 
972 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
973 {
974   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) A->data;
975 
976   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
977     MatSetOption(aij->A,op);
978     MatSetOption(aij->B,op);
979   }
980   else if (op == YES_NEW_NONZERO_LOCATIONS) {
981     MatSetOption(aij->A,op);
982     MatSetOption(aij->B,op);
983   }
984   else if (op == COLUMNS_SORTED) {
985     MatSetOption(aij->A,op);
986     MatSetOption(aij->B,op);
987   }
988   else if (op == COLUMN_ORIENTED)
989     SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported");
990   return 0;
991 }
992 
993 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
994 {
995   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
996   *m = mat->M; *n = mat->N;
997   return 0;
998 }
999 
1000 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1001 {
1002   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1003   *m = mat->m; *n = mat->N;
1004   return 0;
1005 }
1006 
1007 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1008 {
1009   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1010   *m = mat->rstart; *n = mat->rend;
1011   return 0;
1012 }
1013 
1014 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1015 {
1016   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1017   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1018   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1019   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1020 
1021   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1022   lrow = row - rstart;
1023 
1024   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1025   if (!v)   {pvA = 0; pvB = 0;}
1026   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1027   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1028   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1029   nztot = nzA + nzB;
1030 
1031   if (v  || idx) {
1032     if (nztot) {
1033       /* Sort by increasing column numbers, assuming A and B already sorted */
1034       int imark;
1035       if (mat->assembled) {
1036         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1037       }
1038       if (v) {
1039         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1040         for ( i=0; i<nzB; i++ ) {
1041           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1042           else break;
1043         }
1044         imark = i;
1045         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1046         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1047       }
1048       if (idx) {
1049         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1050         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1051         for ( i=0; i<nzB; i++ ) {
1052           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1053           else break;
1054         }
1055         imark = i;
1056         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1057         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1058       }
1059     }
1060     else {*idx = 0; *v=0;}
1061   }
1062   *nz = nztot;
1063   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1064   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1065   return 0;
1066 }
1067 
1068 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1069 {
1070   if (idx) PETSCFREE(*idx);
1071   if (v) PETSCFREE(*v);
1072   return 0;
1073 }
1074 
1075 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1076 {
1077   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1078   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1079   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1080   double     sum = 0.0;
1081   Scalar     *v;
1082 
1083   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1084   if (aij->numtids == 1) {
1085     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1086   } else {
1087     if (type == NORM_FROBENIUS) {
1088       v = amat->a;
1089       for (i=0; i<amat->nz; i++ ) {
1090 #if defined(PETSC_COMPLEX)
1091         sum += real(conj(*v)*(*v)); v++;
1092 #else
1093         sum += (*v)*(*v); v++;
1094 #endif
1095       }
1096       v = bmat->a;
1097       for (i=0; i<bmat->nz; i++ ) {
1098 #if defined(PETSC_COMPLEX)
1099         sum += real(conj(*v)*(*v)); v++;
1100 #else
1101         sum += (*v)*(*v); v++;
1102 #endif
1103       }
1104       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1105       *norm = sqrt(*norm);
1106     }
1107     else if (type == NORM_1) { /* max column norm */
1108       double *tmp, *tmp2;
1109       int    *jj, *garray = aij->garray;
1110       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1111       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1112       PetscZero(tmp,aij->N*sizeof(double));
1113       *norm = 0.0;
1114       v = amat->a; jj = amat->j;
1115       for ( j=0; j<amat->nz; j++ ) {
1116 #if defined(PETSC_COMPLEX)
1117         tmp[cstart + *jj++ + shift] += abs(*v++);
1118 #else
1119         tmp[cstart + *jj++ + shift] += fabs(*v++);
1120 #endif
1121       }
1122       v = bmat->a; jj = bmat->j;
1123       for ( j=0; j<bmat->nz; j++ ) {
1124 #if defined(PETSC_COMPLEX)
1125         tmp[garray[*jj++ + shift]] += abs(*v++);
1126 #else
1127         tmp[garray[*jj++ + shift]] += fabs(*v++);
1128 #endif
1129       }
1130       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1131       for ( j=0; j<aij->N; j++ ) {
1132         if (tmp2[j] > *norm) *norm = tmp2[j];
1133       }
1134       PETSCFREE(tmp); PETSCFREE(tmp2);
1135     }
1136     else if (type == NORM_INFINITY) { /* max row norm */
1137       double normtemp = 0.0;
1138       for ( j=0; j<amat->m; j++ ) {
1139         v = amat->a + amat->i[j] + shift;
1140         sum = 0.0;
1141         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1142 #if defined(PETSC_COMPLEX)
1143           sum += abs(*v); v++;
1144 #else
1145           sum += fabs(*v); v++;
1146 #endif
1147         }
1148         v = bmat->a + bmat->i[j] + shift;
1149         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1150 #if defined(PETSC_COMPLEX)
1151           sum += abs(*v); v++;
1152 #else
1153           sum += fabs(*v); v++;
1154 #endif
1155         }
1156         if (sum > normtemp) normtemp = sum;
1157         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1158       }
1159     }
1160     else {
1161       SETERRQ(1,"MatNorm_MPIRow:No support for two norm");
1162     }
1163   }
1164   return 0;
1165 }
1166 
1167 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1170   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1171   int        ierr,shift = Aloc->indexshift;
1172   Mat        B;
1173   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1174   Scalar     *array;
1175 
1176   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1177   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1178   CHKERRQ(ierr);
1179 
1180   /* copy over the A part */
1181   Aloc = (Mat_SeqAIJ*) a->A->data;
1182   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1183   row = a->rstart;
1184   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1185   for ( i=0; i<m; i++ ) {
1186     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1187     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1188   }
1189   aj = Aloc->j;
1190   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1191 
1192   /* copy over the B part */
1193   Aloc = (Mat_SeqAIJ*) a->B->data;
1194   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1195   row = a->rstart;
1196   ct = cols = (int *) PETSCMALLOC( (ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1197   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1198   for ( i=0; i<m; i++ ) {
1199     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1200     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1201   }
1202   PETSCFREE(ct);
1203   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1204   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1205   if (matout) {
1206     *matout = B;
1207   } else {
1208     /* This isn't really an in-place transpose .... but free data structures from a */
1209     PETSCFREE(a->rowners);
1210     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1211     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1212     if (a->colmap) PETSCFREE(a->colmap);
1213     if (a->garray) PETSCFREE(a->garray);
1214     if (a->lvec) VecDestroy(a->lvec);
1215     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
1216     PETSCFREE(a);
1217     PetscMemcpy(A,B,sizeof(struct _Mat));
1218     PETSCHEADERDESTROY(B);
1219   }
1220   return 0;
1221 }
1222 
1223 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1224 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1225 
1226 /* -------------------------------------------------------------------*/
1227 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1228        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1229        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1230        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1231        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1232        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1233        MatLUFactor_MPIAIJ,0,
1234        MatRelax_MPIAIJ,
1235        MatTranspose_MPIAIJ,
1236        MatGetInfo_MPIAIJ,0,
1237        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1238        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1239        0,
1240        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1241        MatGetReordering_MPIAIJ,
1242        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1243        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1244        MatILUFactorSymbolic_MPIAIJ,0,
1245        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1246 
1247 /*@
1248    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1249    (the default parallel PETSc format).
1250 
1251    Input Parameters:
1252 .  comm - MPI communicator
1253 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1254 .  n - number of local columns (or PETSC_DECIDE to have calculated
1255            if N is given)
1256 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1257 .  N - number of global columns (or PETSC_DECIDE to have calculated
1258            if n is given)
1259 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1260            (same for all local rows)
1261 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1262            or null (possibly different for each row).  You must leave room
1263            for the diagonal entry even if it is zero.
1264 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1265            submatrix (same for all local rows).
1266 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1267            submatrix or null (possibly different for each row).
1268 
1269    Output Parameter:
1270 .  newmat - the matrix
1271 
1272    Notes:
1273    The AIJ format (also called the Yale sparse matrix format or
1274    compressed row storage), is fully compatible with standard Fortran 77
1275    storage.  That is, the stored row and column indices begin at
1276    one, not zero.  See the users manual for further details.
1277 
1278    The user MUST specify either the local or global matrix dimensions
1279    (possibly both).
1280 
1281    Storage Information:
1282    For a square global matrix we define each processor's diagonal portion
1283    to be its local rows and the corresponding columns (a square submatrix);
1284    each processor's off-diagonal portion encompasses the remainder of the
1285    local matrix (a rectangular submatrix).
1286 
1287    The user can specify preallocated storage for the diagonal part of
1288    the local submatrix with either d_nz or d_nnz (not both). Set both
1289    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1290    Likewise, specify preallocated storage for the off-diagonal part of
1291    the local submatrix with o_nz or o_nnz (not both).
1292 
1293 .keywords: matrix, aij, compressed row, sparse, parallel
1294 
1295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1296 @*/
1297 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1298                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1299 {
1300   Mat          mat;
1301   Mat_MPIAIJ   *a;
1302   int          ierr, i,sum[2],work[2];
1303 
1304   *newmat         = 0;
1305   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1306   PLogObjectCreate(mat);
1307   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1308   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1309   mat->destroy    = MatDestroy_MPIAIJ;
1310   mat->view       = MatView_MPIAIJ;
1311   mat->factor     = 0;
1312 
1313   a->insertmode = NOTSETVALUES;
1314   MPI_Comm_rank(comm,&a->mytid);
1315   MPI_Comm_size(comm,&a->numtids);
1316 
1317   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1318     work[0] = m; work[1] = n;
1319     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1320     if (M == PETSC_DECIDE) M = sum[0];
1321     if (N == PETSC_DECIDE) N = sum[1];
1322   }
1323   if (m == PETSC_DECIDE) {m = M/a->numtids + ((M % a->numtids) > a->mytid);}
1324   if (n == PETSC_DECIDE) {n = N/a->numtids + ((N % a->numtids) > a->mytid);}
1325   a->m = m;
1326   a->n = n;
1327   a->N = N;
1328   a->M = M;
1329 
1330   /* build local table of row and column ownerships */
1331   a->rowners = (int *) PETSCMALLOC(2*(a->numtids+2)*sizeof(int)); CHKPTRQ(a->rowners);
1332   PLogObjectMemory(mat,2*(a->numtids+2)*sizeof(int)+sizeof(struct _Mat)+
1333                        sizeof(Mat_MPIAIJ));
1334   a->cowners = a->rowners + a->numtids + 1;
1335   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1336   a->rowners[0] = 0;
1337   for ( i=2; i<=a->numtids; i++ ) {
1338     a->rowners[i] += a->rowners[i-1];
1339   }
1340   a->rstart = a->rowners[a->mytid];
1341   a->rend   = a->rowners[a->mytid+1];
1342   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1343   a->cowners[0] = 0;
1344   for ( i=2; i<=a->numtids; i++ ) {
1345     a->cowners[i] += a->cowners[i-1];
1346   }
1347   a->cstart = a->cowners[a->mytid];
1348   a->cend   = a->cowners[a->mytid+1];
1349 
1350   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1351   PLogObjectParent(mat,a->A);
1352   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1353   PLogObjectParent(mat,a->B);
1354 
1355   /* build cache for off array entries formed */
1356   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1357   a->colmap    = 0;
1358   a->garray    = 0;
1359 
1360   /* stuff used for matrix vector multiply */
1361   a->lvec      = 0;
1362   a->Mvctx     = 0;
1363   a->assembled = 0;
1364 
1365   *newmat = mat;
1366   return 0;
1367 }
1368 
1369 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1370 {
1371   Mat        mat;
1372   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1373   int        ierr, len;
1374 
1375   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1376   *newmat      = 0;
1377   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1378   PLogObjectCreate(mat);
1379   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1380   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1381   mat->destroy    = MatDestroy_MPIAIJ;
1382   mat->view       = MatView_MPIAIJ;
1383   mat->factor     = matin->factor;
1384 
1385   a->m          = oldmat->m;
1386   a->n          = oldmat->n;
1387   a->M          = oldmat->M;
1388   a->N          = oldmat->N;
1389 
1390   a->assembled  = 1;
1391   a->rstart     = oldmat->rstart;
1392   a->rend       = oldmat->rend;
1393   a->cstart     = oldmat->cstart;
1394   a->cend       = oldmat->cend;
1395   a->numtids    = oldmat->numtids;
1396   a->mytid      = oldmat->mytid;
1397   a->insertmode = NOTSETVALUES;
1398 
1399   a->rowners    = (int *) PETSCMALLOC((a->numtids+1)*sizeof(int));CHKPTRQ(a->rowners);
1400   PLogObjectMemory(mat,(a->numtids+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1401   PetscMemcpy(a->rowners,oldmat->rowners,(a->numtids+1)*sizeof(int));
1402   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1403   if (oldmat->colmap) {
1404     a->colmap      = (int *) PETSCMALLOC( (a->N)*sizeof(int) );CHKPTRQ(a->colmap);
1405     PLogObjectMemory(mat,(a->N)*sizeof(int));
1406     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1407   } else a->colmap = 0;
1408   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1409     a->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(a->garray);
1410     PLogObjectMemory(mat,len*sizeof(int));
1411     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1412   } else a->garray = 0;
1413 
1414   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1415   PLogObjectParent(mat,a->lvec);
1416   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1417   PLogObjectParent(mat,a->Mvctx);
1418   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1419   PLogObjectParent(mat,a->A);
1420   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1421   PLogObjectParent(mat,a->B);
1422   *newmat = mat;
1423   return 0;
1424 }
1425 
1426 #include "sysio.h"
1427 
1428 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1429 {
1430   Mat          A;
1431   int          i, nz, ierr, j,rstart, rend, fd;
1432   Scalar       *vals,*svals;
1433   PetscObject  vobj = (PetscObject) bview;
1434   MPI_Comm     comm = vobj->comm;
1435   MPI_Status   status;
1436   int          header[4],mytid,numtid,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1437   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1438   int          tag = ((PetscObject)bview)->tag;
1439 
1440   MPI_Comm_size(comm,&numtid); MPI_Comm_rank(comm,&mytid);
1441   if (!mytid) {
1442     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1443     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1444     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1445   }
1446 
1447   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1448   M = header[1]; N = header[2];
1449   /* determine ownership of all rows */
1450   m = M/numtid + ((M % numtid) > mytid);
1451   rowners = (int *) PETSCMALLOC((numtid+2)*sizeof(int)); CHKPTRQ(rowners);
1452   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1453   rowners[0] = 0;
1454   for ( i=2; i<=numtid; i++ ) {
1455     rowners[i] += rowners[i-1];
1456   }
1457   rstart = rowners[mytid];
1458   rend   = rowners[mytid+1];
1459 
1460   /* distribute row lengths to all processors */
1461   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1462   offlens = ourlens + (rend-rstart);
1463   if (!mytid) {
1464     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1465     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1466     sndcounts = (int*) PETSCMALLOC( numtid*sizeof(int) ); CHKPTRQ(sndcounts);
1467     for ( i=0; i<numtid; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1468     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1469     PETSCFREE(sndcounts);
1470   }
1471   else {
1472     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1473   }
1474 
1475   if (!mytid) {
1476     /* calculate the number of nonzeros on each processor */
1477     procsnz = (int*) PETSCMALLOC( numtid*sizeof(int) ); CHKPTRQ(procsnz);
1478     PetscZero(procsnz,numtid*sizeof(int));
1479     for ( i=0; i<numtid; i++ ) {
1480       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1481         procsnz[i] += rowlengths[j];
1482       }
1483     }
1484     PETSCFREE(rowlengths);
1485 
1486     /* determine max buffer needed and allocate it */
1487     maxnz = 0;
1488     for ( i=0; i<numtid; i++ ) {
1489       maxnz = PETSCMAX(maxnz,procsnz[i]);
1490     }
1491     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1492 
1493     /* read in my part of the matrix column indices  */
1494     nz = procsnz[0];
1495     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1496     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1497 
1498     /* read in every one elses and ship off */
1499     for ( i=1; i<numtid; i++ ) {
1500       nz = procsnz[i];
1501       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1502       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1503     }
1504     PETSCFREE(cols);
1505   }
1506   else {
1507     /* determine buffer space needed for message */
1508     nz = 0;
1509     for ( i=0; i<m; i++ ) {
1510       nz += ourlens[i];
1511     }
1512     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1513 
1514     /* receive message of column indices*/
1515     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1516     MPI_Get_count(&status,MPI_INT,&maxnz);
1517     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1518   }
1519 
1520   /* loop over local rows, determining number of off diagonal entries */
1521   PetscZero(offlens,m*sizeof(int));
1522   jj = 0;
1523   for ( i=0; i<m; i++ ) {
1524     for ( j=0; j<ourlens[i]; j++ ) {
1525       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1526       jj++;
1527     }
1528   }
1529 
1530 
1531   /* create our matrix */
1532   for ( i=0; i<m; i++ ) {
1533     ourlens[i] -= offlens[i];
1534   }
1535   if (type == MATMPIAIJ) {
1536     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1537   }
1538   else if (type == MATMPIROW) {
1539     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1540   }
1541   A = *newmat;
1542   MatSetOption(A,COLUMNS_SORTED);
1543   for ( i=0; i<m; i++ ) {
1544     ourlens[i] += offlens[i];
1545   }
1546 
1547   if (!mytid) {
1548     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1549 
1550     /* read in my part of the matrix numerical values  */
1551     nz = procsnz[0];
1552     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1553 
1554     /* insert into matrix */
1555     jj      = rstart;
1556     smycols = mycols;
1557     svals   = vals;
1558     for ( i=0; i<m; i++ ) {
1559       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1560       smycols += ourlens[i];
1561       svals   += ourlens[i];
1562       jj++;
1563     }
1564 
1565     /* read in other processors and ship out */
1566     for ( i=1; i<numtid; i++ ) {
1567       nz = procsnz[i];
1568       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1569       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1570     }
1571     PETSCFREE(procsnz);
1572   }
1573   else {
1574     /* receive numeric values */
1575     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1576 
1577     /* receive message of values*/
1578     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1579     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1580     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1581 
1582     /* insert into matrix */
1583     jj      = rstart;
1584     smycols = mycols;
1585     svals   = vals;
1586     for ( i=0; i<m; i++ ) {
1587       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1588       smycols += ourlens[i];
1589       svals   += ourlens[i];
1590       jj++;
1591     }
1592   }
1593   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1594 
1595   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1596   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1597   return 0;
1598 }
1599