xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 579c6b6f3d7500e660984684ec3dbc541fe5e741)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.97 1995/11/01 23:18:18 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         size = aij->size, *owners = aij->rowners;
90   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
106   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<size; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
120   nreceives = work[rank];
121   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
122   nmax = work[rank];
123   PetscFree(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PetscFree(owner);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<size; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PetscFree(starts); PetscFree(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PetscFree(send_status);
231   }
232   PetscFree(aij->send_waits); PetscFree(aij->svalues);
233 
234   aij->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
290   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[rank];
307   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
308   nmax = work[rank];
309   PetscFree(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<size; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PetscFree(starts);
344 
345   base = owners[rank];
346 
347   /*  wait on receives */
348   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PetscFree(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PetscFree(rvalues); PetscFree(lens);
372   PetscFree(owner); PetscFree(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PetscFree(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PetscFree(send_status);
388   }
389   PetscFree(send_waits); PetscFree(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PetscFree(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PetscFree(aij->colmap);
484   if (aij->garray) PetscFree(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
487   PetscFree(aij);
488   PLogObjectDestroy(mat);
489   PetscHeaderDestroy(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->size == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
504   return 0;
505 }
506 
507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
508 {
509   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
510   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
511   int         ierr, format,shift = C->indexshift,rank;
512   PetscObject vobj = (PetscObject) viewer;
513   FILE        *fd;
514 
515   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
516     ierr = ViewerFileGetFormat_Private(viewer,&format);
517     if (format == FILE_FORMAT_INFO_DETAILED) {
518       int nz,nzalloc,mem;
519       MPI_Comm_rank(mat->comm,&rank);
520       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
521       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
522       MPIU_Seq_begin(mat->comm,1);
523       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
524                 nzalloc,mem);
525       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
526       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
527       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
528       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
529       fflush(fd);
530       MPIU_Seq_end(mat->comm,1);
531       ierr = VecScatterView(aij->Mvctx,viewer);
532       return 0;
533     }
534     else if (format == FILE_FORMAT_INFO) {
535       return 0;
536     }
537   }
538 
539   if (vobj->type == ASCII_FILE_VIEWER) {
540     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
541     MPIU_Seq_begin(mat->comm,1);
542     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
543            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
544            aij->cend);
545     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
546     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
547     fflush(fd);
548     MPIU_Seq_end(mat->comm,1);
549   }
550   else {
551     int size = aij->size;
552     rank = aij->rank;
553     if (size == 1) {
554       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
555     }
556     else {
557       /* assemble the entire matrix onto first processor. */
558       Mat         A;
559       Mat_SeqAIJ *Aloc;
560       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
561       Scalar      *a;
562 
563       if (!rank) {
564         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
565       }
566       else {
567         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
568       }
569       PLogObjectParent(mat,A);
570 
571 
572       /* copy over the A part */
573       Aloc = (Mat_SeqAIJ*) aij->A->data;
574       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
575       row = aij->rstart;
576       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
577       for ( i=0; i<m; i++ ) {
578         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
579         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
580       }
581       aj = Aloc->j;
582       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
583 
584       /* copy over the B part */
585       Aloc = (Mat_SeqAIJ*) aij->B->data;
586       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
587       row = aij->rstart;
588       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
589       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
590       for ( i=0; i<m; i++ ) {
591         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
592         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
593       }
594       PetscFree(ct);
595       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
596       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
597       if (!rank) {
598         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
599       }
600       ierr = MatDestroy(A); CHKERRQ(ierr);
601     }
602   }
603   return 0;
604 }
605 
606 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
607 {
608   Mat         mat = (Mat) obj;
609   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
610   int         ierr;
611   PetscObject vobj = (PetscObject) viewer;
612 
613   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
614   if (!viewer) {
615     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
616   }
617   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
618   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
619     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
620   }
621   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
622     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
623   }
624   else if (vobj->cookie == DRAW_COOKIE) {
625     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
626   }
627   else if (vobj->type == BINARY_FILE_VIEWER) {
628     return MatView_MPIAIJ_Binary(mat,viewer);
629   }
630   return 0;
631 }
632 
633 extern int MatMarkDiag_SeqAIJ(Mat);
634 /*
635     This has to provide several versions.
636 
637      1) per sequential
638      2) a) use only local smoothing updating outer values only once.
639         b) local smoothing updating outer values each inner iteration
640      3) color updating out values betwen colors.
641 */
642 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
643                            double fshift,int its,Vec xx)
644 {
645   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
646   Mat        AA = mat->A, BB = mat->B;
647   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
648   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
649   int        ierr,*idx, *diag;
650   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
651   Vec        tt;
652 
653   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
654 
655   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
656   xs = x + shift; /* shift by one for index start of 1 */
657   ls = ls + shift;
658   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
659   diag = A->diag;
660   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
661     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
662   }
663   if (flag & SOR_EISENSTAT) {
664     /* Let  A = L + U + D; where L is lower trianglar,
665     U is upper triangular, E is diagonal; This routine applies
666 
667             (L + E)^{-1} A (U + E)^{-1}
668 
669     to a vector efficiently using Eisenstat's trick. This is for
670     the case of SSOR preconditioner, so E is D/omega where omega
671     is the relaxation factor.
672     */
673     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
674     PLogObjectParent(matin,tt);
675     VecGetArray(tt,&t);
676     scale = (2.0/omega) - 1.0;
677     /*  x = (E + U)^{-1} b */
678     VecSet(&zero,mat->lvec);
679     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
680                               mat->Mvctx); CHKERRQ(ierr);
681     for ( i=m-1; i>-1; i-- ) {
682       n    = A->i[i+1] - diag[i] - 1;
683       idx  = A->j + diag[i] + !shift;
684       v    = A->a + diag[i] + !shift;
685       sum  = b[i];
686       SPARSEDENSEMDOT(sum,xs,v,idx,n);
687       d    = fshift + A->a[diag[i]+shift];
688       n    = B->i[i+1] - B->i[i];
689       idx  = B->j + B->i[i] + shift;
690       v    = B->a + B->i[i] + shift;
691       SPARSEDENSEMDOT(sum,ls,v,idx,n);
692       x[i] = omega*(sum/d);
693     }
694     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
695                             mat->Mvctx); CHKERRQ(ierr);
696 
697     /*  t = b - (2*E - D)x */
698     v = A->a;
699     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
700 
701     /*  t = (E + L)^{-1}t */
702     ts = t + shift; /* shifted by one for index start of a or mat->j*/
703     diag = A->diag;
704     VecSet(&zero,mat->lvec);
705     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
706                                                  mat->Mvctx); CHKERRQ(ierr);
707     for ( i=0; i<m; i++ ) {
708       n    = diag[i] - A->i[i];
709       idx  = A->j + A->i[i] + shift;
710       v    = A->a + A->i[i] + shift;
711       sum  = t[i];
712       SPARSEDENSEMDOT(sum,ts,v,idx,n);
713       d    = fshift + A->a[diag[i]+shift];
714       n    = B->i[i+1] - B->i[i];
715       idx  = B->j + B->i[i] + shift;
716       v    = B->a + B->i[i] + shift;
717       SPARSEDENSEMDOT(sum,ls,v,idx,n);
718       t[i] = omega*(sum/d);
719     }
720     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
721                                                     mat->Mvctx); CHKERRQ(ierr);
722     /*  x = x + t */
723     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
724     VecDestroy(tt);
725     return 0;
726   }
727 
728 
729   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
730     if (flag & SOR_ZERO_INITIAL_GUESS) {
731       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
732     }
733     else {
734       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
735       CHKERRQ(ierr);
736       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
737       CHKERRQ(ierr);
738     }
739     while (its--) {
740       /* go down through the rows */
741       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
742                               mat->Mvctx); CHKERRQ(ierr);
743       for ( i=0; i<m; i++ ) {
744         n    = A->i[i+1] - A->i[i];
745         idx  = A->j + A->i[i] + shift;
746         v    = A->a + A->i[i] + shift;
747         sum  = b[i];
748         SPARSEDENSEMDOT(sum,xs,v,idx,n);
749         d    = fshift + A->a[diag[i]+shift];
750         n    = B->i[i+1] - B->i[i];
751         idx  = B->j + B->i[i] + shift;
752         v    = B->a + B->i[i] + shift;
753         SPARSEDENSEMDOT(sum,ls,v,idx,n);
754         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
755       }
756       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
757                             mat->Mvctx); CHKERRQ(ierr);
758       /* come up through the rows */
759       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
760                               mat->Mvctx); CHKERRQ(ierr);
761       for ( i=m-1; i>-1; i-- ) {
762         n    = A->i[i+1] - A->i[i];
763         idx  = A->j + A->i[i] + shift;
764         v    = A->a + A->i[i] + shift;
765         sum  = b[i];
766         SPARSEDENSEMDOT(sum,xs,v,idx,n);
767         d    = fshift + A->a[diag[i]+shift];
768         n    = B->i[i+1] - B->i[i];
769         idx  = B->j + B->i[i] + shift;
770         v    = B->a + B->i[i] + shift;
771         SPARSEDENSEMDOT(sum,ls,v,idx,n);
772         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
773       }
774       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
775                             mat->Mvctx); CHKERRQ(ierr);
776     }
777   }
778   else if (flag & SOR_FORWARD_SWEEP){
779     if (flag & SOR_ZERO_INITIAL_GUESS) {
780       VecSet(&zero,mat->lvec);
781       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
782                               mat->Mvctx); CHKERRQ(ierr);
783       for ( i=0; i<m; i++ ) {
784         n    = diag[i] - A->i[i];
785         idx  = A->j + A->i[i] + shift;
786         v    = A->a + A->i[i] + shift;
787         sum  = b[i];
788         SPARSEDENSEMDOT(sum,xs,v,idx,n);
789         d    = fshift + A->a[diag[i]+shift];
790         n    = B->i[i+1] - B->i[i];
791         idx  = B->j + B->i[i] + shift;
792         v    = B->a + B->i[i] + shift;
793         SPARSEDENSEMDOT(sum,ls,v,idx,n);
794         x[i] = omega*(sum/d);
795       }
796       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
797                             mat->Mvctx); CHKERRQ(ierr);
798       its--;
799     }
800     while (its--) {
801       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
802       CHKERRQ(ierr);
803       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
804       CHKERRQ(ierr);
805       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
806                               mat->Mvctx); CHKERRQ(ierr);
807       for ( i=0; i<m; i++ ) {
808         n    = A->i[i+1] - A->i[i];
809         idx  = A->j + A->i[i] + shift;
810         v    = A->a + A->i[i] + shift;
811         sum  = b[i];
812         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813         d    = fshift + A->a[diag[i]+shift];
814         n    = B->i[i+1] - B->i[i];
815         idx  = B->j + B->i[i] + shift;
816         v    = B->a + B->i[i] + shift;
817         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
819       }
820       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
821                             mat->Mvctx); CHKERRQ(ierr);
822     }
823   }
824   else if (flag & SOR_BACKWARD_SWEEP){
825     if (flag & SOR_ZERO_INITIAL_GUESS) {
826       VecSet(&zero,mat->lvec);
827       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
828                               mat->Mvctx); CHKERRQ(ierr);
829       for ( i=m-1; i>-1; i-- ) {
830         n    = A->i[i+1] - diag[i] - 1;
831         idx  = A->j + diag[i] + !shift;
832         v    = A->a + diag[i] + !shift;
833         sum  = b[i];
834         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835         d    = fshift + A->a[diag[i]+shift];
836         n    = B->i[i+1] - B->i[i];
837         idx  = B->j + B->i[i] + shift;
838         v    = B->a + B->i[i] + shift;
839         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840         x[i] = omega*(sum/d);
841       }
842       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
843                             mat->Mvctx); CHKERRQ(ierr);
844       its--;
845     }
846     while (its--) {
847       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
848                             mat->Mvctx); CHKERRQ(ierr);
849       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
850                             mat->Mvctx); CHKERRQ(ierr);
851       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
852                               mat->Mvctx); CHKERRQ(ierr);
853       for ( i=m-1; i>-1; i-- ) {
854         n    = A->i[i+1] - A->i[i];
855         idx  = A->j + A->i[i] + shift;
856         v    = A->a + A->i[i] + shift;
857         sum  = b[i];
858         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859         d    = fshift + A->a[diag[i]+shift];
860         n    = B->i[i+1] - B->i[i];
861         idx  = B->j + B->i[i] + shift;
862         v    = B->a + B->i[i] + shift;
863         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
865       }
866       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
867                             mat->Mvctx); CHKERRQ(ierr);
868     }
869   }
870   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
873     }
874     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
875     CHKERRQ(ierr);
876     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
877     CHKERRQ(ierr);
878     while (its--) {
879       /* go down through the rows */
880       for ( i=0; i<m; i++ ) {
881         n    = A->i[i+1] - A->i[i];
882         idx  = A->j + A->i[i] + shift;
883         v    = A->a + A->i[i] + shift;
884         sum  = b[i];
885         SPARSEDENSEMDOT(sum,xs,v,idx,n);
886         d    = fshift + A->a[diag[i]+shift];
887         n    = B->i[i+1] - B->i[i];
888         idx  = B->j + B->i[i] + shift;
889         v    = B->a + B->i[i] + shift;
890         SPARSEDENSEMDOT(sum,ls,v,idx,n);
891         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
892       }
893       /* come up through the rows */
894       for ( i=m-1; i>-1; i-- ) {
895         n    = A->i[i+1] - A->i[i];
896         idx  = A->j + A->i[i] + shift;
897         v    = A->a + A->i[i] + shift;
898         sum  = b[i];
899         SPARSEDENSEMDOT(sum,xs,v,idx,n);
900         d    = fshift + A->a[diag[i]+shift];
901         n    = B->i[i+1] - B->i[i];
902         idx  = B->j + B->i[i] + shift;
903         v    = B->a + B->i[i] + shift;
904         SPARSEDENSEMDOT(sum,ls,v,idx,n);
905         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
906       }
907     }
908   }
909   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
910     if (flag & SOR_ZERO_INITIAL_GUESS) {
911       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
912     }
913     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
914     CHKERRQ(ierr);
915     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
916     CHKERRQ(ierr);
917     while (its--) {
918       for ( i=0; i<m; i++ ) {
919         n    = A->i[i+1] - A->i[i];
920         idx  = A->j + A->i[i] + shift;
921         v    = A->a + A->i[i] + shift;
922         sum  = b[i];
923         SPARSEDENSEMDOT(sum,xs,v,idx,n);
924         d    = fshift + A->a[diag[i]+shift];
925         n    = B->i[i+1] - B->i[i];
926         idx  = B->j + B->i[i] + shift;
927         v    = B->a + B->i[i] + shift;
928         SPARSEDENSEMDOT(sum,ls,v,idx,n);
929         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
930       }
931     }
932   }
933   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
934     if (flag & SOR_ZERO_INITIAL_GUESS) {
935       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
936     }
937     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
938                             mat->Mvctx); CHKERRQ(ierr);
939     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
940                             mat->Mvctx); CHKERRQ(ierr);
941     while (its--) {
942       for ( i=m-1; i>-1; i-- ) {
943         n    = A->i[i+1] - A->i[i];
944         idx  = A->j + A->i[i] + shift;
945         v    = A->a + A->i[i] + shift;
946         sum  = b[i];
947         SPARSEDENSEMDOT(sum,xs,v,idx,n);
948         d    = fshift + A->a[diag[i]+shift];
949         n    = B->i[i+1] - B->i[i];
950         idx  = B->j + B->i[i] + shift;
951         v    = B->a + B->i[i] + shift;
952         SPARSEDENSEMDOT(sum,ls,v,idx,n);
953         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
954       }
955     }
956   }
957   return 0;
958 }
959 
960 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
961                              int *nzalloc,int *mem)
962 {
963   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
964   Mat        A = mat->A, B = mat->B;
965   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
966 
967   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
968   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
969   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
970   if (flag == MAT_LOCAL) {
971     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
972   } else if (flag == MAT_GLOBAL_MAX) {
973     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
974     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
975   } else if (flag == MAT_GLOBAL_SUM) {
976     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
977     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
978   }
979   return 0;
980 }
981 
982 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
983 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
984 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
985 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
986 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
987 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
988 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
989 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
990 
991 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
992 {
993   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
994 
995   if (op == NO_NEW_NONZERO_LOCATIONS ||
996       op == YES_NEW_NONZERO_LOCATIONS ||
997       op == COLUMNS_SORTED ||
998       op == ROW_ORIENTED) {
999         MatSetOption(a->A,op);
1000         MatSetOption(a->B,op);
1001   }
1002   else if (op == ROWS_SORTED ||
1003            op == SYMMETRIC_MATRIX ||
1004            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1005            op == YES_NEW_DIAGONALS)
1006     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1007   else if (op == COLUMN_ORIENTED)
1008     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1009   else if (op == NO_NEW_DIAGONALS)
1010     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1011   else
1012     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1013   return 0;
1014 }
1015 
1016 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1017 {
1018   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1019   *m = mat->M; *n = mat->N;
1020   return 0;
1021 }
1022 
1023 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1024 {
1025   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1026   *m = mat->m; *n = mat->N;
1027   return 0;
1028 }
1029 
1030 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1031 {
1032   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1033   *m = mat->rstart; *n = mat->rend;
1034   return 0;
1035 }
1036 
1037 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1038 {
1039   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1040   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1041   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1042   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1043 
1044   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1045   lrow = row - rstart;
1046 
1047   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1048   if (!v)   {pvA = 0; pvB = 0;}
1049   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1050   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1051   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1052   nztot = nzA + nzB;
1053 
1054   if (v  || idx) {
1055     if (nztot) {
1056       /* Sort by increasing column numbers, assuming A and B already sorted */
1057       int imark;
1058       if (mat->assembled) {
1059         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1060       }
1061       if (v) {
1062         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1063         for ( i=0; i<nzB; i++ ) {
1064           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1065           else break;
1066         }
1067         imark = i;
1068         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1069         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1070       }
1071       if (idx) {
1072         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1073         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1074         for ( i=0; i<nzB; i++ ) {
1075           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1076           else break;
1077         }
1078         imark = i;
1079         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1080         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1081       }
1082     }
1083     else {*idx = 0; *v=0;}
1084   }
1085   *nz = nztot;
1086   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1087   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1088   return 0;
1089 }
1090 
1091 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1092 {
1093   if (idx) PetscFree(*idx);
1094   if (v) PetscFree(*v);
1095   return 0;
1096 }
1097 
1098 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1099 {
1100   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1101   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1102   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1103   double     sum = 0.0;
1104   Scalar     *v;
1105 
1106   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1107   if (aij->size == 1) {
1108     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1109   } else {
1110     if (type == NORM_FROBENIUS) {
1111       v = amat->a;
1112       for (i=0; i<amat->nz; i++ ) {
1113 #if defined(PETSC_COMPLEX)
1114         sum += real(conj(*v)*(*v)); v++;
1115 #else
1116         sum += (*v)*(*v); v++;
1117 #endif
1118       }
1119       v = bmat->a;
1120       for (i=0; i<bmat->nz; i++ ) {
1121 #if defined(PETSC_COMPLEX)
1122         sum += real(conj(*v)*(*v)); v++;
1123 #else
1124         sum += (*v)*(*v); v++;
1125 #endif
1126       }
1127       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1128       *norm = sqrt(*norm);
1129     }
1130     else if (type == NORM_1) { /* max column norm */
1131       double *tmp, *tmp2;
1132       int    *jj, *garray = aij->garray;
1133       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1134       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1135       PetscMemzero(tmp,aij->N*sizeof(double));
1136       *norm = 0.0;
1137       v = amat->a; jj = amat->j;
1138       for ( j=0; j<amat->nz; j++ ) {
1139         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1140       }
1141       v = bmat->a; jj = bmat->j;
1142       for ( j=0; j<bmat->nz; j++ ) {
1143         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1144       }
1145       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1146       for ( j=0; j<aij->N; j++ ) {
1147         if (tmp2[j] > *norm) *norm = tmp2[j];
1148       }
1149       PetscFree(tmp); PetscFree(tmp2);
1150     }
1151     else if (type == NORM_INFINITY) { /* max row norm */
1152       double ntemp = 0.0;
1153       for ( j=0; j<amat->m; j++ ) {
1154         v = amat->a + amat->i[j] + shift;
1155         sum = 0.0;
1156         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1157           sum += PetscAbsScalar(*v); v++;
1158         }
1159         v = bmat->a + bmat->i[j] + shift;
1160         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1161           sum += PetscAbsScalar(*v); v++;
1162         }
1163         if (sum > ntemp) ntemp = sum;
1164       }
1165       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1166     }
1167     else {
1168       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1169     }
1170   }
1171   return 0;
1172 }
1173 
1174 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1175 {
1176   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1177   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1178   int        ierr,shift = Aloc->indexshift;
1179   Mat        B;
1180   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1181   Scalar     *array;
1182 
1183   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1184   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1185   CHKERRQ(ierr);
1186 
1187   /* copy over the A part */
1188   Aloc = (Mat_SeqAIJ*) a->A->data;
1189   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1190   row = a->rstart;
1191   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1192   for ( i=0; i<m; i++ ) {
1193     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1194     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1195   }
1196   aj = Aloc->j;
1197   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1198 
1199   /* copy over the B part */
1200   Aloc = (Mat_SeqAIJ*) a->B->data;
1201   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1202   row = a->rstart;
1203   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1204   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1205   for ( i=0; i<m; i++ ) {
1206     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1207     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1208   }
1209   PetscFree(ct);
1210   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1211   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1212   if (matout) {
1213     *matout = B;
1214   } else {
1215     /* This isn't really an in-place transpose .... but free data structures from a */
1216     PetscFree(a->rowners);
1217     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1218     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1219     if (a->colmap) PetscFree(a->colmap);
1220     if (a->garray) PetscFree(a->garray);
1221     if (a->lvec) VecDestroy(a->lvec);
1222     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1223     PetscFree(a);
1224     PetscMemcpy(A,B,sizeof(struct _Mat));
1225     PetscHeaderDestroy(B);
1226   }
1227   return 0;
1228 }
1229 
1230 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1231 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int);
1232 
1233 /* -------------------------------------------------------------------*/
1234 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1235        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1236        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1237        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1238        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1239        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1240        MatLUFactor_MPIAIJ,0,
1241        MatRelax_MPIAIJ,
1242        MatTranspose_MPIAIJ,
1243        MatGetInfo_MPIAIJ,0,
1244        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1245        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1246        0,
1247        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1248        MatGetReordering_MPIAIJ,
1249        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1250        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1251        MatILUFactorSymbolic_MPIAIJ,0,
1252        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1253 
1254 /*@C
1255    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1256    (the default parallel PETSc format).
1257 
1258    Input Parameters:
1259 .  comm - MPI communicator
1260 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1261 .  n - number of local columns (or PETSC_DECIDE to have calculated
1262            if N is given)
1263 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1264 .  N - number of global columns (or PETSC_DECIDE to have calculated
1265            if n is given)
1266 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1267            (same for all local rows)
1268 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1269            or null (possibly different for each row).  You must leave room
1270            for the diagonal entry even if it is zero.
1271 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1272            submatrix (same for all local rows).
1273 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1274            submatrix or null (possibly different for each row).
1275 
1276    Output Parameter:
1277 .  newmat - the matrix
1278 
1279    Notes:
1280    The AIJ format (also called the Yale sparse matrix format or
1281    compressed row storage), is fully compatible with standard Fortran 77
1282    storage.  That is, the stored row and column indices can begin at
1283    either one (as in Fortran) or zero.  See the users manual for details.
1284 
1285    The user MUST specify either the local or global matrix dimensions
1286    (possibly both).
1287 
1288    Storage Information:
1289    For a square global matrix we define each processor's diagonal portion
1290    to be its local rows and the corresponding columns (a square submatrix);
1291    each processor's off-diagonal portion encompasses the remainder of the
1292    local matrix (a rectangular submatrix).
1293 
1294    The user can specify preallocated storage for the diagonal part of
1295    the local submatrix with either d_nz or d_nnz (not both). Set both
1296    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1297    Likewise, specify preallocated storage for the off-diagonal part of
1298    the local submatrix with o_nz or o_nnz (not both).
1299 
1300 .keywords: matrix, aij, compressed row, sparse, parallel
1301 
1302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1303 @*/
1304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1305                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1306 {
1307   Mat          mat;
1308   Mat_MPIAIJ   *a;
1309   int          ierr, i,sum[2],work[2];
1310 
1311   *newmat         = 0;
1312   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1313   PLogObjectCreate(mat);
1314   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1315   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1316   mat->destroy    = MatDestroy_MPIAIJ;
1317   mat->view       = MatView_MPIAIJ;
1318   mat->factor     = 0;
1319 
1320   a->insertmode = NOT_SET_VALUES;
1321   MPI_Comm_rank(comm,&a->rank);
1322   MPI_Comm_size(comm,&a->size);
1323 
1324   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1325     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1326 
1327   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1328     work[0] = m; work[1] = n;
1329     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1330     if (M == PETSC_DECIDE) M = sum[0];
1331     if (N == PETSC_DECIDE) N = sum[1];
1332   }
1333   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1334   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1335   a->m = m;
1336   a->n = n;
1337   a->N = N;
1338   a->M = M;
1339 
1340   /* build local table of row and column ownerships */
1341   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1342   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1343   a->cowners = a->rowners + a->size + 1;
1344   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1345   a->rowners[0] = 0;
1346   for ( i=2; i<=a->size; i++ ) {
1347     a->rowners[i] += a->rowners[i-1];
1348   }
1349   a->rstart = a->rowners[a->rank];
1350   a->rend   = a->rowners[a->rank+1];
1351   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1352   a->cowners[0] = 0;
1353   for ( i=2; i<=a->size; i++ ) {
1354     a->cowners[i] += a->cowners[i-1];
1355   }
1356   a->cstart = a->cowners[a->rank];
1357   a->cend   = a->cowners[a->rank+1];
1358 
1359   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1360   PLogObjectParent(mat,a->A);
1361   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1362   PLogObjectParent(mat,a->B);
1363 
1364   /* build cache for off array entries formed */
1365   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1366   a->colmap    = 0;
1367   a->garray    = 0;
1368 
1369   /* stuff used for matrix vector multiply */
1370   a->lvec      = 0;
1371   a->Mvctx     = 0;
1372   a->assembled = 0;
1373 
1374   *newmat = mat;
1375   return 0;
1376 }
1377 
1378 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1379 {
1380   Mat        mat;
1381   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1382   int        ierr, len;
1383 
1384   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1385   *newmat       = 0;
1386   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1387   PLogObjectCreate(mat);
1388   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1389   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1390   mat->destroy  = MatDestroy_MPIAIJ;
1391   mat->view     = MatView_MPIAIJ;
1392   mat->factor   = matin->factor;
1393 
1394   a->m          = oldmat->m;
1395   a->n          = oldmat->n;
1396   a->M          = oldmat->M;
1397   a->N          = oldmat->N;
1398 
1399   a->assembled  = 1;
1400   a->rstart     = oldmat->rstart;
1401   a->rend       = oldmat->rend;
1402   a->cstart     = oldmat->cstart;
1403   a->cend       = oldmat->cend;
1404   a->size       = oldmat->size;
1405   a->rank       = oldmat->rank;
1406   a->insertmode = NOT_SET_VALUES;
1407 
1408   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1409   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1410   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1411   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1412   if (oldmat->colmap) {
1413     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1414     PLogObjectMemory(mat,(a->N)*sizeof(int));
1415     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1416   } else a->colmap = 0;
1417   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1418     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1419     PLogObjectMemory(mat,len*sizeof(int));
1420     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1421   } else a->garray = 0;
1422 
1423   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1424   PLogObjectParent(mat,a->lvec);
1425   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1426   PLogObjectParent(mat,a->Mvctx);
1427   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1428   PLogObjectParent(mat,a->A);
1429   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1430   PLogObjectParent(mat,a->B);
1431   *newmat = mat;
1432   return 0;
1433 }
1434 
1435 #include "sysio.h"
1436 
1437 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1438 {
1439   Mat          A;
1440   int          i, nz, ierr, j,rstart, rend, fd;
1441   Scalar       *vals,*svals;
1442   PetscObject  vobj = (PetscObject) bview;
1443   MPI_Comm     comm = vobj->comm;
1444   MPI_Status   status;
1445   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1446   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1447   int          tag = ((PetscObject)bview)->tag;
1448 
1449   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1450   if (!rank) {
1451     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1452     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1453     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1454   }
1455 
1456   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1457   M = header[1]; N = header[2];
1458   /* determine ownership of all rows */
1459   m = M/size + ((M % size) > rank);
1460   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1461   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1462   rowners[0] = 0;
1463   for ( i=2; i<=size; i++ ) {
1464     rowners[i] += rowners[i-1];
1465   }
1466   rstart = rowners[rank];
1467   rend   = rowners[rank+1];
1468 
1469   /* distribute row lengths to all processors */
1470   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1471   offlens = ourlens + (rend-rstart);
1472   if (!rank) {
1473     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1474     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1475     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1476     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1477     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1478     PetscFree(sndcounts);
1479   }
1480   else {
1481     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1482   }
1483 
1484   if (!rank) {
1485     /* calculate the number of nonzeros on each processor */
1486     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1487     PetscMemzero(procsnz,size*sizeof(int));
1488     for ( i=0; i<size; i++ ) {
1489       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1490         procsnz[i] += rowlengths[j];
1491       }
1492     }
1493     PetscFree(rowlengths);
1494 
1495     /* determine max buffer needed and allocate it */
1496     maxnz = 0;
1497     for ( i=0; i<size; i++ ) {
1498       maxnz = PetscMax(maxnz,procsnz[i]);
1499     }
1500     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1501 
1502     /* read in my part of the matrix column indices  */
1503     nz = procsnz[0];
1504     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1505     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1506 
1507     /* read in every one elses and ship off */
1508     for ( i=1; i<size; i++ ) {
1509       nz = procsnz[i];
1510       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1511       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1512     }
1513     PetscFree(cols);
1514   }
1515   else {
1516     /* determine buffer space needed for message */
1517     nz = 0;
1518     for ( i=0; i<m; i++ ) {
1519       nz += ourlens[i];
1520     }
1521     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1522 
1523     /* receive message of column indices*/
1524     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1525     MPI_Get_count(&status,MPI_INT,&maxnz);
1526     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1527   }
1528 
1529   /* loop over local rows, determining number of off diagonal entries */
1530   PetscMemzero(offlens,m*sizeof(int));
1531   jj = 0;
1532   for ( i=0; i<m; i++ ) {
1533     for ( j=0; j<ourlens[i]; j++ ) {
1534       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1535       jj++;
1536     }
1537   }
1538 
1539   /* create our matrix */
1540   for ( i=0; i<m; i++ ) {
1541     ourlens[i] -= offlens[i];
1542   }
1543   if (type == MATMPIAIJ) {
1544     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1545   }
1546   else if (type == MATMPIROW) {
1547     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1548   }
1549   A = *newmat;
1550   MatSetOption(A,COLUMNS_SORTED);
1551   for ( i=0; i<m; i++ ) {
1552     ourlens[i] += offlens[i];
1553   }
1554 
1555   if (!rank) {
1556     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1557 
1558     /* read in my part of the matrix numerical values  */
1559     nz = procsnz[0];
1560     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1561 
1562     /* insert into matrix */
1563     jj      = rstart;
1564     smycols = mycols;
1565     svals   = vals;
1566     for ( i=0; i<m; i++ ) {
1567       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1568       smycols += ourlens[i];
1569       svals   += ourlens[i];
1570       jj++;
1571     }
1572 
1573     /* read in other processors and ship out */
1574     for ( i=1; i<size; i++ ) {
1575       nz = procsnz[i];
1576       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1577       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1578     }
1579     PetscFree(procsnz);
1580   }
1581   else {
1582     /* receive numeric values */
1583     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1584 
1585     /* receive message of values*/
1586     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1587     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1588     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1589 
1590     /* insert into matrix */
1591     jj      = rstart;
1592     smycols = mycols;
1593     svals   = vals;
1594     for ( i=0; i<m; i++ ) {
1595       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1596       smycols += ourlens[i];
1597       svals   += ourlens[i];
1598       jj++;
1599     }
1600   }
1601   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1602 
1603   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1604   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1605   return 0;
1606 }
1607