xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 39ddd567ee2cd5629f9b2e9aadacf11d968d0842)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.91 1995/10/19 22:22:37 curfman Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscZero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         size = aij->size, *owners = aij->rowners;
90   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
106   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<size; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
120   nreceives = work[rank];
121   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
122   nmax = work[rank];
123   PETSCFREE(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PETSCFREE(owner);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<size; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PETSCFREE(starts); PETSCFREE(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PETSCFREE(send_status);
231   }
232   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
233 
234   aij->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
290   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[rank];
307   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
308   nmax = work[rank];
309   PETSCFREE(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<size; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PETSCFREE(starts);
344 
345   base = owners[rank];
346 
347   /*  wait on receives */
348   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PETSCFREE(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PETSCFREE(rvalues); PETSCFREE(lens);
372   PETSCFREE(owner); PETSCFREE(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PETSCFREE(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PETSCFREE(send_status);
388   }
389   PETSCFREE(send_waits); PETSCFREE(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PETSCFREE(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PETSCFREE(aij->colmap);
484   if (aij->garray) PETSCFREE(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
487   PETSCFREE(aij);
488   PLogObjectDestroy(mat);
489   PETSCHEADERDESTROY(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->size == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
504   return 0;
505 }
506 
507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
508 {
509   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
510   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
511   int         ierr, format,shift = C->indexshift;
512   PetscObject vobj = (PetscObject) viewer;
513   FILE        *fd;
514 
515   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
516     ierr = ViewerFileGetFormat_Private(viewer,&format);
517     if (format == FILE_FORMAT_INFO) {
518       return 0;
519     }
520   }
521 
522   if (vobj->type == ASCII_FILE_VIEWER) {
523     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
524     MPIU_Seq_begin(mat->comm,1);
525     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
526            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
527            aij->cend);
528     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
529     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
530     fflush(fd);
531     MPIU_Seq_end(mat->comm,1);
532   }
533   else {
534     int size = aij->size, rank = aij->rank;
535     if (size == 1) {
536       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
537     }
538     else {
539       /* assemble the entire matrix onto first processor. */
540       Mat         A;
541       Mat_SeqAIJ *Aloc;
542       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
543       Scalar      *a;
544 
545       if (!rank) {
546         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
547       }
548       else {
549         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
550       }
551       PLogObjectParent(mat,A);
552 
553 
554       /* copy over the A part */
555       Aloc = (Mat_SeqAIJ*) aij->A->data;
556       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
557       row = aij->rstart;
558       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
559       for ( i=0; i<m; i++ ) {
560         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
561         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
562       }
563       aj = Aloc->j;
564       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
565 
566       /* copy over the B part */
567       Aloc = (Mat_SeqAIJ*) aij->B->data;
568       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
569       row = aij->rstart;
570       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
571       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
572       for ( i=0; i<m; i++ ) {
573         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
574         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
575       }
576       PETSCFREE(ct);
577       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
578       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
579       if (!rank) {
580         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
581       }
582       ierr = MatDestroy(A); CHKERRQ(ierr);
583     }
584   }
585   return 0;
586 }
587 
588 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
589 {
590   Mat         mat = (Mat) obj;
591   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
592   int         ierr;
593   PetscObject vobj = (PetscObject) viewer;
594 
595   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
596   if (!viewer) {
597     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
598   }
599   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
600   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
601     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
602   }
603   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
604     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
605   }
606   else if (vobj->cookie == DRAW_COOKIE) {
607     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
608   }
609   else if (vobj->type == BINARY_FILE_VIEWER) {
610     return MatView_MPIAIJ_Binary(mat,viewer);
611   }
612   return 0;
613 }
614 
615 extern int MatMarkDiag_SeqAIJ(Mat);
616 /*
617     This has to provide several versions.
618 
619      1) per sequential
620      2) a) use only local smoothing updating outer values only once.
621         b) local smoothing updating outer values each inner iteration
622      3) color updating out values betwen colors.
623 */
624 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
625                            double fshift,int its,Vec xx)
626 {
627   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
628   Mat        AA = mat->A, BB = mat->B;
629   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
630   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
631   int        ierr,*idx, *diag;
632   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
633   Vec        tt;
634 
635   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
636 
637   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
638   xs = x + shift; /* shift by one for index start of 1 */
639   ls = ls + shift;
640   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
641   diag = A->diag;
642   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
643     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
644   }
645   if (flag & SOR_EISENSTAT) {
646     /* Let  A = L + U + D; where L is lower trianglar,
647     U is upper triangular, E is diagonal; This routine applies
648 
649             (L + E)^{-1} A (U + E)^{-1}
650 
651     to a vector efficiently using Eisenstat's trick. This is for
652     the case of SSOR preconditioner, so E is D/omega where omega
653     is the relaxation factor.
654     */
655     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
656     PLogObjectParent(matin,tt);
657     VecGetArray(tt,&t);
658     scale = (2.0/omega) - 1.0;
659     /*  x = (E + U)^{-1} b */
660     VecSet(&zero,mat->lvec);
661     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
662                               mat->Mvctx); CHKERRQ(ierr);
663     for ( i=m-1; i>-1; i-- ) {
664       n    = A->i[i+1] - diag[i] - 1;
665       idx  = A->j + diag[i] + !shift;
666       v    = A->a + diag[i] + !shift;
667       sum  = b[i];
668       SPARSEDENSEMDOT(sum,xs,v,idx,n);
669       d    = fshift + A->a[diag[i]+shift];
670       n    = B->i[i+1] - B->i[i];
671       idx  = B->j + B->i[i] + shift;
672       v    = B->a + B->i[i] + shift;
673       SPARSEDENSEMDOT(sum,ls,v,idx,n);
674       x[i] = omega*(sum/d);
675     }
676     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
677                             mat->Mvctx); CHKERRQ(ierr);
678 
679     /*  t = b - (2*E - D)x */
680     v = A->a;
681     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
682 
683     /*  t = (E + L)^{-1}t */
684     ts = t + shift; /* shifted by one for index start of a or mat->j*/
685     diag = A->diag;
686     VecSet(&zero,mat->lvec);
687     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
688                                                  mat->Mvctx); CHKERRQ(ierr);
689     for ( i=0; i<m; i++ ) {
690       n    = diag[i] - A->i[i];
691       idx  = A->j + A->i[i] + shift;
692       v    = A->a + A->i[i] + shift;
693       sum  = t[i];
694       SPARSEDENSEMDOT(sum,ts,v,idx,n);
695       d    = fshift + A->a[diag[i]+shift];
696       n    = B->i[i+1] - B->i[i];
697       idx  = B->j + B->i[i] + shift;
698       v    = B->a + B->i[i] + shift;
699       SPARSEDENSEMDOT(sum,ls,v,idx,n);
700       t[i] = omega*(sum/d);
701     }
702     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
703                                                     mat->Mvctx); CHKERRQ(ierr);
704     /*  x = x + t */
705     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
706     VecDestroy(tt);
707     return 0;
708   }
709 
710 
711   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
712     if (flag & SOR_ZERO_INITIAL_GUESS) {
713       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
714     }
715     else {
716       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
717       CHKERRQ(ierr);
718       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
719       CHKERRQ(ierr);
720     }
721     while (its--) {
722       /* go down through the rows */
723       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
724                               mat->Mvctx); CHKERRQ(ierr);
725       for ( i=0; i<m; i++ ) {
726         n    = A->i[i+1] - A->i[i];
727         idx  = A->j + A->i[i] + shift;
728         v    = A->a + A->i[i] + shift;
729         sum  = b[i];
730         SPARSEDENSEMDOT(sum,xs,v,idx,n);
731         d    = fshift + A->a[diag[i]+shift];
732         n    = B->i[i+1] - B->i[i];
733         idx  = B->j + B->i[i] + shift;
734         v    = B->a + B->i[i] + shift;
735         SPARSEDENSEMDOT(sum,ls,v,idx,n);
736         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
737       }
738       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
739                             mat->Mvctx); CHKERRQ(ierr);
740       /* come up through the rows */
741       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
742                               mat->Mvctx); CHKERRQ(ierr);
743       for ( i=m-1; i>-1; i-- ) {
744         n    = A->i[i+1] - A->i[i];
745         idx  = A->j + A->i[i] + shift;
746         v    = A->a + A->i[i] + shift;
747         sum  = b[i];
748         SPARSEDENSEMDOT(sum,xs,v,idx,n);
749         d    = fshift + A->a[diag[i]+shift];
750         n    = B->i[i+1] - B->i[i];
751         idx  = B->j + B->i[i] + shift;
752         v    = B->a + B->i[i] + shift;
753         SPARSEDENSEMDOT(sum,ls,v,idx,n);
754         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
755       }
756       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
757                             mat->Mvctx); CHKERRQ(ierr);
758     }
759   }
760   else if (flag & SOR_FORWARD_SWEEP){
761     if (flag & SOR_ZERO_INITIAL_GUESS) {
762       VecSet(&zero,mat->lvec);
763       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
764                               mat->Mvctx); CHKERRQ(ierr);
765       for ( i=0; i<m; i++ ) {
766         n    = diag[i] - A->i[i];
767         idx  = A->j + A->i[i] + shift;
768         v    = A->a + A->i[i] + shift;
769         sum  = b[i];
770         SPARSEDENSEMDOT(sum,xs,v,idx,n);
771         d    = fshift + A->a[diag[i]+shift];
772         n    = B->i[i+1] - B->i[i];
773         idx  = B->j + B->i[i] + shift;
774         v    = B->a + B->i[i] + shift;
775         SPARSEDENSEMDOT(sum,ls,v,idx,n);
776         x[i] = omega*(sum/d);
777       }
778       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
779                             mat->Mvctx); CHKERRQ(ierr);
780       its--;
781     }
782     while (its--) {
783       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
784       CHKERRQ(ierr);
785       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
786       CHKERRQ(ierr);
787       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
788                               mat->Mvctx); CHKERRQ(ierr);
789       for ( i=0; i<m; i++ ) {
790         n    = A->i[i+1] - A->i[i];
791         idx  = A->j + A->i[i] + shift;
792         v    = A->a + A->i[i] + shift;
793         sum  = b[i];
794         SPARSEDENSEMDOT(sum,xs,v,idx,n);
795         d    = fshift + A->a[diag[i]+shift];
796         n    = B->i[i+1] - B->i[i];
797         idx  = B->j + B->i[i] + shift;
798         v    = B->a + B->i[i] + shift;
799         SPARSEDENSEMDOT(sum,ls,v,idx,n);
800         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
801       }
802       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
803                             mat->Mvctx); CHKERRQ(ierr);
804     }
805   }
806   else if (flag & SOR_BACKWARD_SWEEP){
807     if (flag & SOR_ZERO_INITIAL_GUESS) {
808       VecSet(&zero,mat->lvec);
809       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
810                               mat->Mvctx); CHKERRQ(ierr);
811       for ( i=m-1; i>-1; i-- ) {
812         n    = A->i[i+1] - diag[i] - 1;
813         idx  = A->j + diag[i] + !shift;
814         v    = A->a + diag[i] + !shift;
815         sum  = b[i];
816         SPARSEDENSEMDOT(sum,xs,v,idx,n);
817         d    = fshift + A->a[diag[i]+shift];
818         n    = B->i[i+1] - B->i[i];
819         idx  = B->j + B->i[i] + shift;
820         v    = B->a + B->i[i] + shift;
821         SPARSEDENSEMDOT(sum,ls,v,idx,n);
822         x[i] = omega*(sum/d);
823       }
824       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
825                             mat->Mvctx); CHKERRQ(ierr);
826       its--;
827     }
828     while (its--) {
829       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
830                             mat->Mvctx); CHKERRQ(ierr);
831       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
832                             mat->Mvctx); CHKERRQ(ierr);
833       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
834                               mat->Mvctx); CHKERRQ(ierr);
835       for ( i=m-1; i>-1; i-- ) {
836         n    = A->i[i+1] - A->i[i];
837         idx  = A->j + A->i[i] + shift;
838         v    = A->a + A->i[i] + shift;
839         sum  = b[i];
840         SPARSEDENSEMDOT(sum,xs,v,idx,n);
841         d    = fshift + A->a[diag[i]+shift];
842         n    = B->i[i+1] - B->i[i];
843         idx  = B->j + B->i[i] + shift;
844         v    = B->a + B->i[i] + shift;
845         SPARSEDENSEMDOT(sum,ls,v,idx,n);
846         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
847       }
848       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
849                             mat->Mvctx); CHKERRQ(ierr);
850     }
851   }
852   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
855     }
856     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
857     CHKERRQ(ierr);
858     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
859     CHKERRQ(ierr);
860     while (its--) {
861       /* go down through the rows */
862       for ( i=0; i<m; i++ ) {
863         n    = A->i[i+1] - A->i[i];
864         idx  = A->j + A->i[i] + shift;
865         v    = A->a + A->i[i] + shift;
866         sum  = b[i];
867         SPARSEDENSEMDOT(sum,xs,v,idx,n);
868         d    = fshift + A->a[diag[i]+shift];
869         n    = B->i[i+1] - B->i[i];
870         idx  = B->j + B->i[i] + shift;
871         v    = B->a + B->i[i] + shift;
872         SPARSEDENSEMDOT(sum,ls,v,idx,n);
873         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
874       }
875       /* come up through the rows */
876       for ( i=m-1; i>-1; i-- ) {
877         n    = A->i[i+1] - A->i[i];
878         idx  = A->j + A->i[i] + shift;
879         v    = A->a + A->i[i] + shift;
880         sum  = b[i];
881         SPARSEDENSEMDOT(sum,xs,v,idx,n);
882         d    = fshift + A->a[diag[i]+shift];
883         n    = B->i[i+1] - B->i[i];
884         idx  = B->j + B->i[i] + shift;
885         v    = B->a + B->i[i] + shift;
886         SPARSEDENSEMDOT(sum,ls,v,idx,n);
887         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
888       }
889     }
890   }
891   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
892     if (flag & SOR_ZERO_INITIAL_GUESS) {
893       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
894     }
895     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
896     CHKERRQ(ierr);
897     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
898     CHKERRQ(ierr);
899     while (its--) {
900       for ( i=0; i<m; i++ ) {
901         n    = A->i[i+1] - A->i[i];
902         idx  = A->j + A->i[i] + shift;
903         v    = A->a + A->i[i] + shift;
904         sum  = b[i];
905         SPARSEDENSEMDOT(sum,xs,v,idx,n);
906         d    = fshift + A->a[diag[i]+shift];
907         n    = B->i[i+1] - B->i[i];
908         idx  = B->j + B->i[i] + shift;
909         v    = B->a + B->i[i] + shift;
910         SPARSEDENSEMDOT(sum,ls,v,idx,n);
911         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
912       }
913     }
914   }
915   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
916     if (flag & SOR_ZERO_INITIAL_GUESS) {
917       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
918     }
919     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
920                             mat->Mvctx); CHKERRQ(ierr);
921     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
922                             mat->Mvctx); CHKERRQ(ierr);
923     while (its--) {
924       for ( i=m-1; i>-1; i-- ) {
925         n    = A->i[i+1] - A->i[i];
926         idx  = A->j + A->i[i] + shift;
927         v    = A->a + A->i[i] + shift;
928         sum  = b[i];
929         SPARSEDENSEMDOT(sum,xs,v,idx,n);
930         d    = fshift + A->a[diag[i]+shift];
931         n    = B->i[i+1] - B->i[i];
932         idx  = B->j + B->i[i] + shift;
933         v    = B->a + B->i[i] + shift;
934         SPARSEDENSEMDOT(sum,ls,v,idx,n);
935         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
936       }
937     }
938   }
939   return 0;
940 }
941 
942 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
943                              int *nzalloc,int *mem)
944 {
945   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
946   Mat        A = mat->A, B = mat->B;
947   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
948 
949   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
950   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
951   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
952   if (flag == MAT_LOCAL) {
953     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
954   } else if (flag == MAT_GLOBAL_MAX) {
955     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
956     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
957   } else if (flag == MAT_GLOBAL_SUM) {
958     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
959     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
960   }
961   return 0;
962 }
963 
964 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
965 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
966 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
967 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
968 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
969 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
970 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
971 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
972 
973 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
976 
977   if (op == NO_NEW_NONZERO_LOCATIONS ||
978       op == YES_NEW_NONZERO_LOCATIONS ||
979       op == COLUMNS_SORTED ||
980       op == ROW_ORIENTED) {
981         MatSetOption(a->A,op);
982         MatSetOption(a->B,op);
983   }
984   else if (op == ROWS_SORTED ||
985            op == SYMMETRIC_MATRIX ||
986            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
987            op == YES_NEW_DIAGONALS)
988     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
989   else if (op == COLUMN_ORIENTED)
990     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
991   else if (op == NO_NEW_DIAGONALS)
992     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
993   else
994     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
995   return 0;
996 }
997 
998 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
999 {
1000   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1001   *m = mat->M; *n = mat->N;
1002   return 0;
1003 }
1004 
1005 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1006 {
1007   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1008   *m = mat->m; *n = mat->N;
1009   return 0;
1010 }
1011 
1012 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1013 {
1014   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1015   *m = mat->rstart; *n = mat->rend;
1016   return 0;
1017 }
1018 
1019 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1020 {
1021   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1022   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1023   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1024   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1025 
1026   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1027   lrow = row - rstart;
1028 
1029   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1030   if (!v)   {pvA = 0; pvB = 0;}
1031   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1032   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1033   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1034   nztot = nzA + nzB;
1035 
1036   if (v  || idx) {
1037     if (nztot) {
1038       /* Sort by increasing column numbers, assuming A and B already sorted */
1039       int imark;
1040       if (mat->assembled) {
1041         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1042       }
1043       if (v) {
1044         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1045         for ( i=0; i<nzB; i++ ) {
1046           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1047           else break;
1048         }
1049         imark = i;
1050         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1051         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1052       }
1053       if (idx) {
1054         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1055         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1056         for ( i=0; i<nzB; i++ ) {
1057           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1058           else break;
1059         }
1060         imark = i;
1061         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1062         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1063       }
1064     }
1065     else {*idx = 0; *v=0;}
1066   }
1067   *nz = nztot;
1068   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1069   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1070   return 0;
1071 }
1072 
1073 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1074 {
1075   if (idx) PETSCFREE(*idx);
1076   if (v) PETSCFREE(*v);
1077   return 0;
1078 }
1079 
1080 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1081 {
1082   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1083   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1084   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1085   double     sum = 0.0;
1086   Scalar     *v;
1087 
1088   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1089   if (aij->size == 1) {
1090     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1091   } else {
1092     if (type == NORM_FROBENIUS) {
1093       v = amat->a;
1094       for (i=0; i<amat->nz; i++ ) {
1095 #if defined(PETSC_COMPLEX)
1096         sum += real(conj(*v)*(*v)); v++;
1097 #else
1098         sum += (*v)*(*v); v++;
1099 #endif
1100       }
1101       v = bmat->a;
1102       for (i=0; i<bmat->nz; i++ ) {
1103 #if defined(PETSC_COMPLEX)
1104         sum += real(conj(*v)*(*v)); v++;
1105 #else
1106         sum += (*v)*(*v); v++;
1107 #endif
1108       }
1109       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1110       *norm = sqrt(*norm);
1111     }
1112     else if (type == NORM_1) { /* max column norm */
1113       double *tmp, *tmp2;
1114       int    *jj, *garray = aij->garray;
1115       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1116       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1117       PetscZero(tmp,aij->N*sizeof(double));
1118       *norm = 0.0;
1119       v = amat->a; jj = amat->j;
1120       for ( j=0; j<amat->nz; j++ ) {
1121 #if defined(PETSC_COMPLEX)
1122         tmp[cstart + *jj++ + shift] += abs(*v++);
1123 #else
1124         tmp[cstart + *jj++ + shift] += fabs(*v++);
1125 #endif
1126       }
1127       v = bmat->a; jj = bmat->j;
1128       for ( j=0; j<bmat->nz; j++ ) {
1129 #if defined(PETSC_COMPLEX)
1130         tmp[garray[*jj++ + shift]] += abs(*v++);
1131 #else
1132         tmp[garray[*jj++ + shift]] += fabs(*v++);
1133 #endif
1134       }
1135       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1136       for ( j=0; j<aij->N; j++ ) {
1137         if (tmp2[j] > *norm) *norm = tmp2[j];
1138       }
1139       PETSCFREE(tmp); PETSCFREE(tmp2);
1140     }
1141     else if (type == NORM_INFINITY) { /* max row norm */
1142       double normtemp = 0.0;
1143       for ( j=0; j<amat->m; j++ ) {
1144         v = amat->a + amat->i[j] + shift;
1145         sum = 0.0;
1146         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1147 #if defined(PETSC_COMPLEX)
1148           sum += abs(*v); v++;
1149 #else
1150           sum += fabs(*v); v++;
1151 #endif
1152         }
1153         v = bmat->a + bmat->i[j] + shift;
1154         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1155 #if defined(PETSC_COMPLEX)
1156           sum += abs(*v); v++;
1157 #else
1158           sum += fabs(*v); v++;
1159 #endif
1160         }
1161         if (sum > normtemp) normtemp = sum;
1162         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1163       }
1164     }
1165     else {
1166       SETERRQ(1,"MatNorm_MPIRow:No support for two norm");
1167     }
1168   }
1169   return 0;
1170 }
1171 
1172 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1173 {
1174   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1175   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1176   int        ierr,shift = Aloc->indexshift;
1177   Mat        B;
1178   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1179   Scalar     *array;
1180 
1181   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1182   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1183   CHKERRQ(ierr);
1184 
1185   /* copy over the A part */
1186   Aloc = (Mat_SeqAIJ*) a->A->data;
1187   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1188   row = a->rstart;
1189   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1190   for ( i=0; i<m; i++ ) {
1191     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1192     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1193   }
1194   aj = Aloc->j;
1195   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1196 
1197   /* copy over the B part */
1198   Aloc = (Mat_SeqAIJ*) a->B->data;
1199   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1200   row = a->rstart;
1201   ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1202   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1203   for ( i=0; i<m; i++ ) {
1204     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1205     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1206   }
1207   PETSCFREE(ct);
1208   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1209   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1210   if (matout) {
1211     *matout = B;
1212   } else {
1213     /* This isn't really an in-place transpose .... but free data structures from a */
1214     PETSCFREE(a->rowners);
1215     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1216     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1217     if (a->colmap) PETSCFREE(a->colmap);
1218     if (a->garray) PETSCFREE(a->garray);
1219     if (a->lvec) VecDestroy(a->lvec);
1220     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
1221     PETSCFREE(a);
1222     PetscMemcpy(A,B,sizeof(struct _Mat));
1223     PETSCHEADERDESTROY(B);
1224   }
1225   return 0;
1226 }
1227 
1228 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1229 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1230 
1231 /* -------------------------------------------------------------------*/
1232 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1233        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1234        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1235        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1236        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1237        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1238        MatLUFactor_MPIAIJ,0,
1239        MatRelax_MPIAIJ,
1240        MatTranspose_MPIAIJ,
1241        MatGetInfo_MPIAIJ,0,
1242        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1243        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1244        0,
1245        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1246        MatGetReordering_MPIAIJ,
1247        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1248        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1249        MatILUFactorSymbolic_MPIAIJ,0,
1250        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1251 
1252 /*@C
1253    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1254    (the default parallel PETSc format).
1255 
1256    Input Parameters:
1257 .  comm - MPI communicator
1258 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1259 .  n - number of local columns (or PETSC_DECIDE to have calculated
1260            if N is given)
1261 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1262 .  N - number of global columns (or PETSC_DECIDE to have calculated
1263            if n is given)
1264 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1265            (same for all local rows)
1266 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1267            or null (possibly different for each row).  You must leave room
1268            for the diagonal entry even if it is zero.
1269 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1270            submatrix (same for all local rows).
1271 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1272            submatrix or null (possibly different for each row).
1273 
1274    Output Parameter:
1275 .  newmat - the matrix
1276 
1277    Notes:
1278    The AIJ format (also called the Yale sparse matrix format or
1279    compressed row storage), is fully compatible with standard Fortran 77
1280    storage.  That is, the stored row and column indices can begin at
1281    either one (as in Fortran) or zero.  See the users manual for details.
1282 
1283    The user MUST specify either the local or global matrix dimensions
1284    (possibly both).
1285 
1286    Storage Information:
1287    For a square global matrix we define each processor's diagonal portion
1288    to be its local rows and the corresponding columns (a square submatrix);
1289    each processor's off-diagonal portion encompasses the remainder of the
1290    local matrix (a rectangular submatrix).
1291 
1292    The user can specify preallocated storage for the diagonal part of
1293    the local submatrix with either d_nz or d_nnz (not both). Set both
1294    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1295    Likewise, specify preallocated storage for the off-diagonal part of
1296    the local submatrix with o_nz or o_nnz (not both).
1297 
1298 .keywords: matrix, aij, compressed row, sparse, parallel
1299 
1300 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1301 @*/
1302 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1303                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1304 {
1305   Mat          mat;
1306   Mat_MPIAIJ   *a;
1307   int          ierr, i,sum[2],work[2];
1308 
1309   *newmat         = 0;
1310   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1311   PLogObjectCreate(mat);
1312   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1313   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1314   mat->destroy    = MatDestroy_MPIAIJ;
1315   mat->view       = MatView_MPIAIJ;
1316   mat->factor     = 0;
1317 
1318   a->insertmode = NOT_SET_VALUES;
1319   MPI_Comm_rank(comm,&a->rank);
1320   MPI_Comm_size(comm,&a->size);
1321 
1322   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1323     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1324 
1325   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1326     work[0] = m; work[1] = n;
1327     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1328     if (M == PETSC_DECIDE) M = sum[0];
1329     if (N == PETSC_DECIDE) N = sum[1];
1330   }
1331   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1332   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1333   a->m = m;
1334   a->n = n;
1335   a->N = N;
1336   a->M = M;
1337 
1338   /* build local table of row and column ownerships */
1339   a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1340   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
1341                        sizeof(Mat_MPIAIJ));
1342   a->cowners = a->rowners + a->size + 1;
1343   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1344   a->rowners[0] = 0;
1345   for ( i=2; i<=a->size; i++ ) {
1346     a->rowners[i] += a->rowners[i-1];
1347   }
1348   a->rstart = a->rowners[a->rank];
1349   a->rend   = a->rowners[a->rank+1];
1350   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1351   a->cowners[0] = 0;
1352   for ( i=2; i<=a->size; i++ ) {
1353     a->cowners[i] += a->cowners[i-1];
1354   }
1355   a->cstart = a->cowners[a->rank];
1356   a->cend   = a->cowners[a->rank+1];
1357 
1358   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1359   PLogObjectParent(mat,a->A);
1360   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1361   PLogObjectParent(mat,a->B);
1362 
1363   /* build cache for off array entries formed */
1364   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1365   a->colmap    = 0;
1366   a->garray    = 0;
1367 
1368   /* stuff used for matrix vector multiply */
1369   a->lvec      = 0;
1370   a->Mvctx     = 0;
1371   a->assembled = 0;
1372 
1373   *newmat = mat;
1374   return 0;
1375 }
1376 
1377 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1378 {
1379   Mat        mat;
1380   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1381   int        ierr, len;
1382 
1383   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1384   *newmat       = 0;
1385   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1386   PLogObjectCreate(mat);
1387   mat->data     = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1388   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1389   mat->destroy  = MatDestroy_MPIAIJ;
1390   mat->view     = MatView_MPIAIJ;
1391   mat->factor   = matin->factor;
1392 
1393   a->m          = oldmat->m;
1394   a->n          = oldmat->n;
1395   a->M          = oldmat->M;
1396   a->N          = oldmat->N;
1397 
1398   a->assembled  = 1;
1399   a->rstart     = oldmat->rstart;
1400   a->rend       = oldmat->rend;
1401   a->cstart     = oldmat->cstart;
1402   a->cend       = oldmat->cend;
1403   a->size       = oldmat->size;
1404   a->rank       = oldmat->rank;
1405   a->insertmode = NOT_SET_VALUES;
1406 
1407   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1408   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1409   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1410   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1411   if (oldmat->colmap) {
1412     a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1413     PLogObjectMemory(mat,(a->N)*sizeof(int));
1414     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1415   } else a->colmap = 0;
1416   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1417     a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray);
1418     PLogObjectMemory(mat,len*sizeof(int));
1419     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1420   } else a->garray = 0;
1421 
1422   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1423   PLogObjectParent(mat,a->lvec);
1424   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1425   PLogObjectParent(mat,a->Mvctx);
1426   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1427   PLogObjectParent(mat,a->A);
1428   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1429   PLogObjectParent(mat,a->B);
1430   *newmat = mat;
1431   return 0;
1432 }
1433 
1434 #include "sysio.h"
1435 
1436 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1437 {
1438   Mat          A;
1439   int          i, nz, ierr, j,rstart, rend, fd;
1440   Scalar       *vals,*svals;
1441   PetscObject  vobj = (PetscObject) bview;
1442   MPI_Comm     comm = vobj->comm;
1443   MPI_Status   status;
1444   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1445   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1446   int          tag = ((PetscObject)bview)->tag;
1447 
1448   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1449   if (!rank) {
1450     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1451     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1452     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1453   }
1454 
1455   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1456   M = header[1]; N = header[2];
1457   /* determine ownership of all rows */
1458   m = M/size + ((M % size) > rank);
1459   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
1460   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1461   rowners[0] = 0;
1462   for ( i=2; i<=size; i++ ) {
1463     rowners[i] += rowners[i-1];
1464   }
1465   rstart = rowners[rank];
1466   rend   = rowners[rank+1];
1467 
1468   /* distribute row lengths to all processors */
1469   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1470   offlens = ourlens + (rend-rstart);
1471   if (!rank) {
1472     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1473     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1474     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
1475     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1476     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1477     PETSCFREE(sndcounts);
1478   }
1479   else {
1480     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1481   }
1482 
1483   if (!rank) {
1484     /* calculate the number of nonzeros on each processor */
1485     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
1486     PetscZero(procsnz,size*sizeof(int));
1487     for ( i=0; i<size; i++ ) {
1488       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1489         procsnz[i] += rowlengths[j];
1490       }
1491     }
1492     PETSCFREE(rowlengths);
1493 
1494     /* determine max buffer needed and allocate it */
1495     maxnz = 0;
1496     for ( i=0; i<size; i++ ) {
1497       maxnz = PETSCMAX(maxnz,procsnz[i]);
1498     }
1499     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1500 
1501     /* read in my part of the matrix column indices  */
1502     nz = procsnz[0];
1503     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1504     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1505 
1506     /* read in every one elses and ship off */
1507     for ( i=1; i<size; i++ ) {
1508       nz = procsnz[i];
1509       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1510       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1511     }
1512     PETSCFREE(cols);
1513   }
1514   else {
1515     /* determine buffer space needed for message */
1516     nz = 0;
1517     for ( i=0; i<m; i++ ) {
1518       nz += ourlens[i];
1519     }
1520     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1521 
1522     /* receive message of column indices*/
1523     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1524     MPI_Get_count(&status,MPI_INT,&maxnz);
1525     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1526   }
1527 
1528   /* loop over local rows, determining number of off diagonal entries */
1529   PetscZero(offlens,m*sizeof(int));
1530   jj = 0;
1531   for ( i=0; i<m; i++ ) {
1532     for ( j=0; j<ourlens[i]; j++ ) {
1533       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1534       jj++;
1535     }
1536   }
1537 
1538   /* create our matrix */
1539   for ( i=0; i<m; i++ ) {
1540     ourlens[i] -= offlens[i];
1541   }
1542   if (type == MATMPIAIJ) {
1543     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1544   }
1545   else if (type == MATMPIROW) {
1546     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1547   }
1548   A = *newmat;
1549   MatSetOption(A,COLUMNS_SORTED);
1550   for ( i=0; i<m; i++ ) {
1551     ourlens[i] += offlens[i];
1552   }
1553 
1554   if (!rank) {
1555     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1556 
1557     /* read in my part of the matrix numerical values  */
1558     nz = procsnz[0];
1559     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1560 
1561     /* insert into matrix */
1562     jj      = rstart;
1563     smycols = mycols;
1564     svals   = vals;
1565     for ( i=0; i<m; i++ ) {
1566       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1567       smycols += ourlens[i];
1568       svals   += ourlens[i];
1569       jj++;
1570     }
1571 
1572     /* read in other processors and ship out */
1573     for ( i=1; i<size; i++ ) {
1574       nz = procsnz[i];
1575       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1576       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1577     }
1578     PETSCFREE(procsnz);
1579   }
1580   else {
1581     /* receive numeric values */
1582     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1583 
1584     /* receive message of values*/
1585     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1586     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1587     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1588 
1589     /* insert into matrix */
1590     jj      = rstart;
1591     smycols = mycols;
1592     svals   = vals;
1593     for ( i=0; i<m; i++ ) {
1594       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1595       smycols += ourlens[i];
1596       svals   += ourlens[i];
1597       jj++;
1598     }
1599   }
1600   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1601 
1602   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1603   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1604   return 0;
1605 }
1606