xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d9f96c7ca71afc96fd7309d95e12b2b98fa1305e)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.93 1995/10/23 17:21:45 bsmith Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscZero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         size = aij->size, *owners = aij->rowners;
90   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
106   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<size; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
120   nreceives = work[rank];
121   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
122   nmax = work[rank];
123   PETSCFREE(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PETSCFREE(owner);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<size; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PETSCFREE(starts); PETSCFREE(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PETSCFREE(send_status);
231   }
232   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
233 
234   aij->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
290   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[rank];
307   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
308   nmax = work[rank];
309   PETSCFREE(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<size; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PETSCFREE(starts);
344 
345   base = owners[rank];
346 
347   /*  wait on receives */
348   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PETSCFREE(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PETSCFREE(rvalues); PETSCFREE(lens);
372   PETSCFREE(owner); PETSCFREE(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PETSCFREE(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PETSCFREE(send_status);
388   }
389   PETSCFREE(send_waits); PETSCFREE(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PETSCFREE(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PETSCFREE(aij->colmap);
484   if (aij->garray) PETSCFREE(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
487   PETSCFREE(aij);
488   PLogObjectDestroy(mat);
489   PETSCHEADERDESTROY(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->size == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
504   return 0;
505 }
506 
507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
508 {
509   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
510   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
511   int         ierr, format,shift = C->indexshift,rank;
512   PetscObject vobj = (PetscObject) viewer;
513   FILE        *fd;
514 
515   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
516     ierr = ViewerFileGetFormat_Private(viewer,&format);
517     if (format == FILE_FORMAT_INFO) {
518       int nz,nzalloc,mem;
519       MPI_Comm_rank(mat->comm,&rank);
520       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
521       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
522       MPIU_Seq_begin(mat->comm,1);
523         fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
524                 nzalloc,mem);
525       fflush(fd);
526       MPIU_Seq_end(mat->comm,1);
527       ierr = VecScatterView(aij->Mvctx,viewer);
528       return 0;
529     }
530   }
531 
532   if (vobj->type == ASCII_FILE_VIEWER) {
533     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
534     MPIU_Seq_begin(mat->comm,1);
535     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
536            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
537            aij->cend);
538     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
539     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
540     fflush(fd);
541     MPIU_Seq_end(mat->comm,1);
542   }
543   else {
544     int size = aij->size;
545     rank = aij->rank;
546     if (size == 1) {
547       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
548     }
549     else {
550       /* assemble the entire matrix onto first processor. */
551       Mat         A;
552       Mat_SeqAIJ *Aloc;
553       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
554       Scalar      *a;
555 
556       if (!rank) {
557         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
558       }
559       else {
560         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
561       }
562       PLogObjectParent(mat,A);
563 
564 
565       /* copy over the A part */
566       Aloc = (Mat_SeqAIJ*) aij->A->data;
567       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
568       row = aij->rstart;
569       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
570       for ( i=0; i<m; i++ ) {
571         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
572         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
573       }
574       aj = Aloc->j;
575       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
576 
577       /* copy over the B part */
578       Aloc = (Mat_SeqAIJ*) aij->B->data;
579       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
580       row = aij->rstart;
581       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
582       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
583       for ( i=0; i<m; i++ ) {
584         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
585         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
586       }
587       PETSCFREE(ct);
588       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
589       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
590       if (!rank) {
591         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
592       }
593       ierr = MatDestroy(A); CHKERRQ(ierr);
594     }
595   }
596   return 0;
597 }
598 
599 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
600 {
601   Mat         mat = (Mat) obj;
602   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
603   int         ierr;
604   PetscObject vobj = (PetscObject) viewer;
605 
606   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
607   if (!viewer) {
608     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
609   }
610   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
611   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
612     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
613   }
614   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
615     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
616   }
617   else if (vobj->cookie == DRAW_COOKIE) {
618     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
619   }
620   else if (vobj->type == BINARY_FILE_VIEWER) {
621     return MatView_MPIAIJ_Binary(mat,viewer);
622   }
623   return 0;
624 }
625 
626 extern int MatMarkDiag_SeqAIJ(Mat);
627 /*
628     This has to provide several versions.
629 
630      1) per sequential
631      2) a) use only local smoothing updating outer values only once.
632         b) local smoothing updating outer values each inner iteration
633      3) color updating out values betwen colors.
634 */
635 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
636                            double fshift,int its,Vec xx)
637 {
638   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
639   Mat        AA = mat->A, BB = mat->B;
640   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
641   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
642   int        ierr,*idx, *diag;
643   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
644   Vec        tt;
645 
646   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
647 
648   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
649   xs = x + shift; /* shift by one for index start of 1 */
650   ls = ls + shift;
651   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
652   diag = A->diag;
653   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
654     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
655   }
656   if (flag & SOR_EISENSTAT) {
657     /* Let  A = L + U + D; where L is lower trianglar,
658     U is upper triangular, E is diagonal; This routine applies
659 
660             (L + E)^{-1} A (U + E)^{-1}
661 
662     to a vector efficiently using Eisenstat's trick. This is for
663     the case of SSOR preconditioner, so E is D/omega where omega
664     is the relaxation factor.
665     */
666     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
667     PLogObjectParent(matin,tt);
668     VecGetArray(tt,&t);
669     scale = (2.0/omega) - 1.0;
670     /*  x = (E + U)^{-1} b */
671     VecSet(&zero,mat->lvec);
672     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
673                               mat->Mvctx); CHKERRQ(ierr);
674     for ( i=m-1; i>-1; i-- ) {
675       n    = A->i[i+1] - diag[i] - 1;
676       idx  = A->j + diag[i] + !shift;
677       v    = A->a + diag[i] + !shift;
678       sum  = b[i];
679       SPARSEDENSEMDOT(sum,xs,v,idx,n);
680       d    = fshift + A->a[diag[i]+shift];
681       n    = B->i[i+1] - B->i[i];
682       idx  = B->j + B->i[i] + shift;
683       v    = B->a + B->i[i] + shift;
684       SPARSEDENSEMDOT(sum,ls,v,idx,n);
685       x[i] = omega*(sum/d);
686     }
687     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
688                             mat->Mvctx); CHKERRQ(ierr);
689 
690     /*  t = b - (2*E - D)x */
691     v = A->a;
692     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
693 
694     /*  t = (E + L)^{-1}t */
695     ts = t + shift; /* shifted by one for index start of a or mat->j*/
696     diag = A->diag;
697     VecSet(&zero,mat->lvec);
698     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
699                                                  mat->Mvctx); CHKERRQ(ierr);
700     for ( i=0; i<m; i++ ) {
701       n    = diag[i] - A->i[i];
702       idx  = A->j + A->i[i] + shift;
703       v    = A->a + A->i[i] + shift;
704       sum  = t[i];
705       SPARSEDENSEMDOT(sum,ts,v,idx,n);
706       d    = fshift + A->a[diag[i]+shift];
707       n    = B->i[i+1] - B->i[i];
708       idx  = B->j + B->i[i] + shift;
709       v    = B->a + B->i[i] + shift;
710       SPARSEDENSEMDOT(sum,ls,v,idx,n);
711       t[i] = omega*(sum/d);
712     }
713     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
714                                                     mat->Mvctx); CHKERRQ(ierr);
715     /*  x = x + t */
716     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
717     VecDestroy(tt);
718     return 0;
719   }
720 
721 
722   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
723     if (flag & SOR_ZERO_INITIAL_GUESS) {
724       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
725     }
726     else {
727       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
728       CHKERRQ(ierr);
729       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
730       CHKERRQ(ierr);
731     }
732     while (its--) {
733       /* go down through the rows */
734       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
735                               mat->Mvctx); CHKERRQ(ierr);
736       for ( i=0; i<m; i++ ) {
737         n    = A->i[i+1] - A->i[i];
738         idx  = A->j + A->i[i] + shift;
739         v    = A->a + A->i[i] + shift;
740         sum  = b[i];
741         SPARSEDENSEMDOT(sum,xs,v,idx,n);
742         d    = fshift + A->a[diag[i]+shift];
743         n    = B->i[i+1] - B->i[i];
744         idx  = B->j + B->i[i] + shift;
745         v    = B->a + B->i[i] + shift;
746         SPARSEDENSEMDOT(sum,ls,v,idx,n);
747         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
748       }
749       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
750                             mat->Mvctx); CHKERRQ(ierr);
751       /* come up through the rows */
752       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
753                               mat->Mvctx); CHKERRQ(ierr);
754       for ( i=m-1; i>-1; i-- ) {
755         n    = A->i[i+1] - A->i[i];
756         idx  = A->j + A->i[i] + shift;
757         v    = A->a + A->i[i] + shift;
758         sum  = b[i];
759         SPARSEDENSEMDOT(sum,xs,v,idx,n);
760         d    = fshift + A->a[diag[i]+shift];
761         n    = B->i[i+1] - B->i[i];
762         idx  = B->j + B->i[i] + shift;
763         v    = B->a + B->i[i] + shift;
764         SPARSEDENSEMDOT(sum,ls,v,idx,n);
765         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
766       }
767       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
768                             mat->Mvctx); CHKERRQ(ierr);
769     }
770   }
771   else if (flag & SOR_FORWARD_SWEEP){
772     if (flag & SOR_ZERO_INITIAL_GUESS) {
773       VecSet(&zero,mat->lvec);
774       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
775                               mat->Mvctx); CHKERRQ(ierr);
776       for ( i=0; i<m; i++ ) {
777         n    = diag[i] - A->i[i];
778         idx  = A->j + A->i[i] + shift;
779         v    = A->a + A->i[i] + shift;
780         sum  = b[i];
781         SPARSEDENSEMDOT(sum,xs,v,idx,n);
782         d    = fshift + A->a[diag[i]+shift];
783         n    = B->i[i+1] - B->i[i];
784         idx  = B->j + B->i[i] + shift;
785         v    = B->a + B->i[i] + shift;
786         SPARSEDENSEMDOT(sum,ls,v,idx,n);
787         x[i] = omega*(sum/d);
788       }
789       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
790                             mat->Mvctx); CHKERRQ(ierr);
791       its--;
792     }
793     while (its--) {
794       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
795       CHKERRQ(ierr);
796       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
797       CHKERRQ(ierr);
798       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
799                               mat->Mvctx); CHKERRQ(ierr);
800       for ( i=0; i<m; i++ ) {
801         n    = A->i[i+1] - A->i[i];
802         idx  = A->j + A->i[i] + shift;
803         v    = A->a + A->i[i] + shift;
804         sum  = b[i];
805         SPARSEDENSEMDOT(sum,xs,v,idx,n);
806         d    = fshift + A->a[diag[i]+shift];
807         n    = B->i[i+1] - B->i[i];
808         idx  = B->j + B->i[i] + shift;
809         v    = B->a + B->i[i] + shift;
810         SPARSEDENSEMDOT(sum,ls,v,idx,n);
811         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
812       }
813       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
814                             mat->Mvctx); CHKERRQ(ierr);
815     }
816   }
817   else if (flag & SOR_BACKWARD_SWEEP){
818     if (flag & SOR_ZERO_INITIAL_GUESS) {
819       VecSet(&zero,mat->lvec);
820       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
821                               mat->Mvctx); CHKERRQ(ierr);
822       for ( i=m-1; i>-1; i-- ) {
823         n    = A->i[i+1] - diag[i] - 1;
824         idx  = A->j + diag[i] + !shift;
825         v    = A->a + diag[i] + !shift;
826         sum  = b[i];
827         SPARSEDENSEMDOT(sum,xs,v,idx,n);
828         d    = fshift + A->a[diag[i]+shift];
829         n    = B->i[i+1] - B->i[i];
830         idx  = B->j + B->i[i] + shift;
831         v    = B->a + B->i[i] + shift;
832         SPARSEDENSEMDOT(sum,ls,v,idx,n);
833         x[i] = omega*(sum/d);
834       }
835       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
836                             mat->Mvctx); CHKERRQ(ierr);
837       its--;
838     }
839     while (its--) {
840       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
841                             mat->Mvctx); CHKERRQ(ierr);
842       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
843                             mat->Mvctx); CHKERRQ(ierr);
844       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
845                               mat->Mvctx); CHKERRQ(ierr);
846       for ( i=m-1; i>-1; i-- ) {
847         n    = A->i[i+1] - A->i[i];
848         idx  = A->j + A->i[i] + shift;
849         v    = A->a + A->i[i] + shift;
850         sum  = b[i];
851         SPARSEDENSEMDOT(sum,xs,v,idx,n);
852         d    = fshift + A->a[diag[i]+shift];
853         n    = B->i[i+1] - B->i[i];
854         idx  = B->j + B->i[i] + shift;
855         v    = B->a + B->i[i] + shift;
856         SPARSEDENSEMDOT(sum,ls,v,idx,n);
857         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
858       }
859       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
860                             mat->Mvctx); CHKERRQ(ierr);
861     }
862   }
863   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
864     if (flag & SOR_ZERO_INITIAL_GUESS) {
865       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
866     }
867     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
868     CHKERRQ(ierr);
869     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
870     CHKERRQ(ierr);
871     while (its--) {
872       /* go down through the rows */
873       for ( i=0; i<m; i++ ) {
874         n    = A->i[i+1] - A->i[i];
875         idx  = A->j + A->i[i] + shift;
876         v    = A->a + A->i[i] + shift;
877         sum  = b[i];
878         SPARSEDENSEMDOT(sum,xs,v,idx,n);
879         d    = fshift + A->a[diag[i]+shift];
880         n    = B->i[i+1] - B->i[i];
881         idx  = B->j + B->i[i] + shift;
882         v    = B->a + B->i[i] + shift;
883         SPARSEDENSEMDOT(sum,ls,v,idx,n);
884         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
885       }
886       /* come up through the rows */
887       for ( i=m-1; i>-1; i-- ) {
888         n    = A->i[i+1] - A->i[i];
889         idx  = A->j + A->i[i] + shift;
890         v    = A->a + A->i[i] + shift;
891         sum  = b[i];
892         SPARSEDENSEMDOT(sum,xs,v,idx,n);
893         d    = fshift + A->a[diag[i]+shift];
894         n    = B->i[i+1] - B->i[i];
895         idx  = B->j + B->i[i] + shift;
896         v    = B->a + B->i[i] + shift;
897         SPARSEDENSEMDOT(sum,ls,v,idx,n);
898         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
899       }
900     }
901   }
902   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
903     if (flag & SOR_ZERO_INITIAL_GUESS) {
904       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
905     }
906     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
907     CHKERRQ(ierr);
908     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
909     CHKERRQ(ierr);
910     while (its--) {
911       for ( i=0; i<m; i++ ) {
912         n    = A->i[i+1] - A->i[i];
913         idx  = A->j + A->i[i] + shift;
914         v    = A->a + A->i[i] + shift;
915         sum  = b[i];
916         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917         d    = fshift + A->a[diag[i]+shift];
918         n    = B->i[i+1] - B->i[i];
919         idx  = B->j + B->i[i] + shift;
920         v    = B->a + B->i[i] + shift;
921         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
923       }
924     }
925   }
926   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
927     if (flag & SOR_ZERO_INITIAL_GUESS) {
928       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
929     }
930     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
931                             mat->Mvctx); CHKERRQ(ierr);
932     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
933                             mat->Mvctx); CHKERRQ(ierr);
934     while (its--) {
935       for ( i=m-1; i>-1; i-- ) {
936         n    = A->i[i+1] - A->i[i];
937         idx  = A->j + A->i[i] + shift;
938         v    = A->a + A->i[i] + shift;
939         sum  = b[i];
940         SPARSEDENSEMDOT(sum,xs,v,idx,n);
941         d    = fshift + A->a[diag[i]+shift];
942         n    = B->i[i+1] - B->i[i];
943         idx  = B->j + B->i[i] + shift;
944         v    = B->a + B->i[i] + shift;
945         SPARSEDENSEMDOT(sum,ls,v,idx,n);
946         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
947       }
948     }
949   }
950   return 0;
951 }
952 
953 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
954                              int *nzalloc,int *mem)
955 {
956   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
957   Mat        A = mat->A, B = mat->B;
958   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
959 
960   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
961   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
962   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
963   if (flag == MAT_LOCAL) {
964     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
965   } else if (flag == MAT_GLOBAL_MAX) {
966     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
967     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
968   } else if (flag == MAT_GLOBAL_SUM) {
969     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
970     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
971   }
972   return 0;
973 }
974 
975 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
976 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
977 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
978 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
979 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
980 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
981 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
982 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
983 
984 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
985 {
986   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
987 
988   if (op == NO_NEW_NONZERO_LOCATIONS ||
989       op == YES_NEW_NONZERO_LOCATIONS ||
990       op == COLUMNS_SORTED ||
991       op == ROW_ORIENTED) {
992         MatSetOption(a->A,op);
993         MatSetOption(a->B,op);
994   }
995   else if (op == ROWS_SORTED ||
996            op == SYMMETRIC_MATRIX ||
997            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
998            op == YES_NEW_DIAGONALS)
999     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1000   else if (op == COLUMN_ORIENTED)
1001     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1002   else if (op == NO_NEW_DIAGONALS)
1003     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1004   else
1005     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1006   return 0;
1007 }
1008 
1009 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1010 {
1011   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1012   *m = mat->M; *n = mat->N;
1013   return 0;
1014 }
1015 
1016 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1017 {
1018   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1019   *m = mat->m; *n = mat->N;
1020   return 0;
1021 }
1022 
1023 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1024 {
1025   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1026   *m = mat->rstart; *n = mat->rend;
1027   return 0;
1028 }
1029 
1030 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1031 {
1032   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1033   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1034   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1035   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1036 
1037   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1038   lrow = row - rstart;
1039 
1040   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1041   if (!v)   {pvA = 0; pvB = 0;}
1042   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1043   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1044   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1045   nztot = nzA + nzB;
1046 
1047   if (v  || idx) {
1048     if (nztot) {
1049       /* Sort by increasing column numbers, assuming A and B already sorted */
1050       int imark;
1051       if (mat->assembled) {
1052         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1053       }
1054       if (v) {
1055         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1056         for ( i=0; i<nzB; i++ ) {
1057           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1058           else break;
1059         }
1060         imark = i;
1061         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1062         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1063       }
1064       if (idx) {
1065         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1066         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1067         for ( i=0; i<nzB; i++ ) {
1068           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1069           else break;
1070         }
1071         imark = i;
1072         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1073         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1074       }
1075     }
1076     else {*idx = 0; *v=0;}
1077   }
1078   *nz = nztot;
1079   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1080   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1081   return 0;
1082 }
1083 
1084 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1085 {
1086   if (idx) PETSCFREE(*idx);
1087   if (v) PETSCFREE(*v);
1088   return 0;
1089 }
1090 
1091 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1092 {
1093   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1094   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1095   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1096   double     sum = 0.0;
1097   Scalar     *v;
1098 
1099   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1100   if (aij->size == 1) {
1101     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1102   } else {
1103     if (type == NORM_FROBENIUS) {
1104       v = amat->a;
1105       for (i=0; i<amat->nz; i++ ) {
1106 #if defined(PETSC_COMPLEX)
1107         sum += real(conj(*v)*(*v)); v++;
1108 #else
1109         sum += (*v)*(*v); v++;
1110 #endif
1111       }
1112       v = bmat->a;
1113       for (i=0; i<bmat->nz; i++ ) {
1114 #if defined(PETSC_COMPLEX)
1115         sum += real(conj(*v)*(*v)); v++;
1116 #else
1117         sum += (*v)*(*v); v++;
1118 #endif
1119       }
1120       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1121       *norm = sqrt(*norm);
1122     }
1123     else if (type == NORM_1) { /* max column norm */
1124       double *tmp, *tmp2;
1125       int    *jj, *garray = aij->garray;
1126       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1127       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1128       PetscZero(tmp,aij->N*sizeof(double));
1129       *norm = 0.0;
1130       v = amat->a; jj = amat->j;
1131       for ( j=0; j<amat->nz; j++ ) {
1132 #if defined(PETSC_COMPLEX)
1133         tmp[cstart + *jj++ + shift] += abs(*v++);
1134 #else
1135         tmp[cstart + *jj++ + shift] += fabs(*v++);
1136 #endif
1137       }
1138       v = bmat->a; jj = bmat->j;
1139       for ( j=0; j<bmat->nz; j++ ) {
1140 #if defined(PETSC_COMPLEX)
1141         tmp[garray[*jj++ + shift]] += abs(*v++);
1142 #else
1143         tmp[garray[*jj++ + shift]] += fabs(*v++);
1144 #endif
1145       }
1146       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1147       for ( j=0; j<aij->N; j++ ) {
1148         if (tmp2[j] > *norm) *norm = tmp2[j];
1149       }
1150       PETSCFREE(tmp); PETSCFREE(tmp2);
1151     }
1152     else if (type == NORM_INFINITY) { /* max row norm */
1153       double ntemp = 0.0;
1154       for ( j=0; j<amat->m; j++ ) {
1155         v = amat->a + amat->i[j] + shift;
1156         sum = 0.0;
1157         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1158 #if defined(PETSC_COMPLEX)
1159           sum += abs(*v); v++;
1160 #else
1161           sum += fabs(*v); v++;
1162 #endif
1163         }
1164         v = bmat->a + bmat->i[j] + shift;
1165         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1166 #if defined(PETSC_COMPLEX)
1167           sum += abs(*v); v++;
1168 #else
1169           sum += fabs(*v); v++;
1170 #endif
1171         }
1172         if (sum > ntemp) ntemp = sum;
1173       }
1174       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1175     }
1176     else {
1177       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1178     }
1179   }
1180   return 0;
1181 }
1182 
1183 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1184 {
1185   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1186   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1187   int        ierr,shift = Aloc->indexshift;
1188   Mat        B;
1189   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1190   Scalar     *array;
1191 
1192   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1193   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1194   CHKERRQ(ierr);
1195 
1196   /* copy over the A part */
1197   Aloc = (Mat_SeqAIJ*) a->A->data;
1198   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1199   row = a->rstart;
1200   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1201   for ( i=0; i<m; i++ ) {
1202     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1203     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1204   }
1205   aj = Aloc->j;
1206   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1207 
1208   /* copy over the B part */
1209   Aloc = (Mat_SeqAIJ*) a->B->data;
1210   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1211   row = a->rstart;
1212   ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1213   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1214   for ( i=0; i<m; i++ ) {
1215     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1216     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1217   }
1218   PETSCFREE(ct);
1219   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1220   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1221   if (matout) {
1222     *matout = B;
1223   } else {
1224     /* This isn't really an in-place transpose .... but free data structures from a */
1225     PETSCFREE(a->rowners);
1226     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1227     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1228     if (a->colmap) PETSCFREE(a->colmap);
1229     if (a->garray) PETSCFREE(a->garray);
1230     if (a->lvec) VecDestroy(a->lvec);
1231     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1232     PETSCFREE(a);
1233     PetscMemcpy(A,B,sizeof(struct _Mat));
1234     PETSCHEADERDESTROY(B);
1235   }
1236   return 0;
1237 }
1238 
1239 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1240 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int);
1241 
1242 /* -------------------------------------------------------------------*/
1243 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1244        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1245        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1246        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1247        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1248        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1249        MatLUFactor_MPIAIJ,0,
1250        MatRelax_MPIAIJ,
1251        MatTranspose_MPIAIJ,
1252        MatGetInfo_MPIAIJ,0,
1253        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1254        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1255        0,
1256        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1257        MatGetReordering_MPIAIJ,
1258        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1259        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1260        MatILUFactorSymbolic_MPIAIJ,0,
1261        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1262 
1263 /*@C
1264    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1265    (the default parallel PETSc format).
1266 
1267    Input Parameters:
1268 .  comm - MPI communicator
1269 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1270 .  n - number of local columns (or PETSC_DECIDE to have calculated
1271            if N is given)
1272 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1273 .  N - number of global columns (or PETSC_DECIDE to have calculated
1274            if n is given)
1275 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1276            (same for all local rows)
1277 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1278            or null (possibly different for each row).  You must leave room
1279            for the diagonal entry even if it is zero.
1280 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1281            submatrix (same for all local rows).
1282 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1283            submatrix or null (possibly different for each row).
1284 
1285    Output Parameter:
1286 .  newmat - the matrix
1287 
1288    Notes:
1289    The AIJ format (also called the Yale sparse matrix format or
1290    compressed row storage), is fully compatible with standard Fortran 77
1291    storage.  That is, the stored row and column indices can begin at
1292    either one (as in Fortran) or zero.  See the users manual for details.
1293 
1294    The user MUST specify either the local or global matrix dimensions
1295    (possibly both).
1296 
1297    Storage Information:
1298    For a square global matrix we define each processor's diagonal portion
1299    to be its local rows and the corresponding columns (a square submatrix);
1300    each processor's off-diagonal portion encompasses the remainder of the
1301    local matrix (a rectangular submatrix).
1302 
1303    The user can specify preallocated storage for the diagonal part of
1304    the local submatrix with either d_nz or d_nnz (not both). Set both
1305    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1306    Likewise, specify preallocated storage for the off-diagonal part of
1307    the local submatrix with o_nz or o_nnz (not both).
1308 
1309 .keywords: matrix, aij, compressed row, sparse, parallel
1310 
1311 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1312 @*/
1313 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1314                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1315 {
1316   Mat          mat;
1317   Mat_MPIAIJ   *a;
1318   int          ierr, i,sum[2],work[2];
1319 
1320   *newmat         = 0;
1321   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1322   PLogObjectCreate(mat);
1323   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1324   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1325   mat->destroy    = MatDestroy_MPIAIJ;
1326   mat->view       = MatView_MPIAIJ;
1327   mat->factor     = 0;
1328 
1329   a->insertmode = NOT_SET_VALUES;
1330   MPI_Comm_rank(comm,&a->rank);
1331   MPI_Comm_size(comm,&a->size);
1332 
1333   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1334     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1335 
1336   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1337     work[0] = m; work[1] = n;
1338     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1339     if (M == PETSC_DECIDE) M = sum[0];
1340     if (N == PETSC_DECIDE) N = sum[1];
1341   }
1342   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1343   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1344   a->m = m;
1345   a->n = n;
1346   a->N = N;
1347   a->M = M;
1348 
1349   /* build local table of row and column ownerships */
1350   a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1351   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
1352                        sizeof(Mat_MPIAIJ));
1353   a->cowners = a->rowners + a->size + 1;
1354   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1355   a->rowners[0] = 0;
1356   for ( i=2; i<=a->size; i++ ) {
1357     a->rowners[i] += a->rowners[i-1];
1358   }
1359   a->rstart = a->rowners[a->rank];
1360   a->rend   = a->rowners[a->rank+1];
1361   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1362   a->cowners[0] = 0;
1363   for ( i=2; i<=a->size; i++ ) {
1364     a->cowners[i] += a->cowners[i-1];
1365   }
1366   a->cstart = a->cowners[a->rank];
1367   a->cend   = a->cowners[a->rank+1];
1368 
1369   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1370   PLogObjectParent(mat,a->A);
1371   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1372   PLogObjectParent(mat,a->B);
1373 
1374   /* build cache for off array entries formed */
1375   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1376   a->colmap    = 0;
1377   a->garray    = 0;
1378 
1379   /* stuff used for matrix vector multiply */
1380   a->lvec      = 0;
1381   a->Mvctx     = 0;
1382   a->assembled = 0;
1383 
1384   *newmat = mat;
1385   return 0;
1386 }
1387 
1388 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1389 {
1390   Mat        mat;
1391   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1392   int        ierr, len;
1393 
1394   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1395   *newmat       = 0;
1396   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1397   PLogObjectCreate(mat);
1398   mat->data     = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1399   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1400   mat->destroy  = MatDestroy_MPIAIJ;
1401   mat->view     = MatView_MPIAIJ;
1402   mat->factor   = matin->factor;
1403 
1404   a->m          = oldmat->m;
1405   a->n          = oldmat->n;
1406   a->M          = oldmat->M;
1407   a->N          = oldmat->N;
1408 
1409   a->assembled  = 1;
1410   a->rstart     = oldmat->rstart;
1411   a->rend       = oldmat->rend;
1412   a->cstart     = oldmat->cstart;
1413   a->cend       = oldmat->cend;
1414   a->size       = oldmat->size;
1415   a->rank       = oldmat->rank;
1416   a->insertmode = NOT_SET_VALUES;
1417 
1418   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1419   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1420   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1421   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1422   if (oldmat->colmap) {
1423     a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1424     PLogObjectMemory(mat,(a->N)*sizeof(int));
1425     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1426   } else a->colmap = 0;
1427   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1428     a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray);
1429     PLogObjectMemory(mat,len*sizeof(int));
1430     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1431   } else a->garray = 0;
1432 
1433   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1434   PLogObjectParent(mat,a->lvec);
1435   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1436   PLogObjectParent(mat,a->Mvctx);
1437   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1438   PLogObjectParent(mat,a->A);
1439   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1440   PLogObjectParent(mat,a->B);
1441   *newmat = mat;
1442   return 0;
1443 }
1444 
1445 #include "sysio.h"
1446 
1447 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1448 {
1449   Mat          A;
1450   int          i, nz, ierr, j,rstart, rend, fd;
1451   Scalar       *vals,*svals;
1452   PetscObject  vobj = (PetscObject) bview;
1453   MPI_Comm     comm = vobj->comm;
1454   MPI_Status   status;
1455   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1456   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1457   int          tag = ((PetscObject)bview)->tag;
1458 
1459   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1460   if (!rank) {
1461     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1462     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1463     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1464   }
1465 
1466   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1467   M = header[1]; N = header[2];
1468   /* determine ownership of all rows */
1469   m = M/size + ((M % size) > rank);
1470   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
1471   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1472   rowners[0] = 0;
1473   for ( i=2; i<=size; i++ ) {
1474     rowners[i] += rowners[i-1];
1475   }
1476   rstart = rowners[rank];
1477   rend   = rowners[rank+1];
1478 
1479   /* distribute row lengths to all processors */
1480   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1481   offlens = ourlens + (rend-rstart);
1482   if (!rank) {
1483     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1484     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1485     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
1486     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1487     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1488     PETSCFREE(sndcounts);
1489   }
1490   else {
1491     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1492   }
1493 
1494   if (!rank) {
1495     /* calculate the number of nonzeros on each processor */
1496     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
1497     PetscZero(procsnz,size*sizeof(int));
1498     for ( i=0; i<size; i++ ) {
1499       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1500         procsnz[i] += rowlengths[j];
1501       }
1502     }
1503     PETSCFREE(rowlengths);
1504 
1505     /* determine max buffer needed and allocate it */
1506     maxnz = 0;
1507     for ( i=0; i<size; i++ ) {
1508       maxnz = PETSCMAX(maxnz,procsnz[i]);
1509     }
1510     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1511 
1512     /* read in my part of the matrix column indices  */
1513     nz = procsnz[0];
1514     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1515     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1516 
1517     /* read in every one elses and ship off */
1518     for ( i=1; i<size; i++ ) {
1519       nz = procsnz[i];
1520       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1521       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1522     }
1523     PETSCFREE(cols);
1524   }
1525   else {
1526     /* determine buffer space needed for message */
1527     nz = 0;
1528     for ( i=0; i<m; i++ ) {
1529       nz += ourlens[i];
1530     }
1531     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1532 
1533     /* receive message of column indices*/
1534     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1535     MPI_Get_count(&status,MPI_INT,&maxnz);
1536     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1537   }
1538 
1539   /* loop over local rows, determining number of off diagonal entries */
1540   PetscZero(offlens,m*sizeof(int));
1541   jj = 0;
1542   for ( i=0; i<m; i++ ) {
1543     for ( j=0; j<ourlens[i]; j++ ) {
1544       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1545       jj++;
1546     }
1547   }
1548 
1549   /* create our matrix */
1550   for ( i=0; i<m; i++ ) {
1551     ourlens[i] -= offlens[i];
1552   }
1553   if (type == MATMPIAIJ) {
1554     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1555   }
1556   else if (type == MATMPIROW) {
1557     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1558   }
1559   A = *newmat;
1560   MatSetOption(A,COLUMNS_SORTED);
1561   for ( i=0; i<m; i++ ) {
1562     ourlens[i] += offlens[i];
1563   }
1564 
1565   if (!rank) {
1566     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1567 
1568     /* read in my part of the matrix numerical values  */
1569     nz = procsnz[0];
1570     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1571 
1572     /* insert into matrix */
1573     jj      = rstart;
1574     smycols = mycols;
1575     svals   = vals;
1576     for ( i=0; i<m; i++ ) {
1577       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1578       smycols += ourlens[i];
1579       svals   += ourlens[i];
1580       jj++;
1581     }
1582 
1583     /* read in other processors and ship out */
1584     for ( i=1; i<size; i++ ) {
1585       nz = procsnz[i];
1586       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1587       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1588     }
1589     PETSCFREE(procsnz);
1590   }
1591   else {
1592     /* receive numeric values */
1593     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1594 
1595     /* receive message of values*/
1596     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1597     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1598     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1599 
1600     /* insert into matrix */
1601     jj      = rstart;
1602     smycols = mycols;
1603     svals   = vals;
1604     for ( i=0; i<m; i++ ) {
1605       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1606       smycols += ourlens[i];
1607       svals   += ourlens[i];
1608       jj++;
1609     }
1610   }
1611   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1612 
1613   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1614   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1615   return 0;
1616 }
1617