xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 551bd6ce4be148d2b75b86aa359d9bec349b3a85)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.121 1996/02/03 04:20:03 bsmith Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int        ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift,roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (mat->was_assembled) {
67             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
68             col = aij->colmap[in[j]] + shift;
69             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
70               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
71               col =  in[j];
72             }
73           }
74           else col = in[j];
75           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       if (roworiented) {
82         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
83       }
84       else {
85         row = im[i];
86         for ( j=0; j<n; j++ ) {
87           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   return 0;
93 }
94 
95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96 {
97   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100   int        cstart = aij->cstart, cend = aij->cend,row,col;
101   int        shift = C->indexshift;
102 
103   for ( i=0; i<m; i++ ) {
104     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106     if (idxm[i] >= rstart && idxm[i] < rend) {
107       row = idxm[i] - rstart;
108       for ( j=0; j<n; j++ ) {
109         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111         if (idxn[j] >= cstart && idxn[j] < cend){
112           col = idxn[j] - cstart;
113           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114         }
115         else {
116           col = aij->colmap[idxn[j]] + shift;
117           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118         }
119       }
120     }
121     else {
122       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123     }
124   }
125   return 0;
126 }
127 
128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
129 {
130   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131   MPI_Comm    comm = mat->comm;
132   int         size = aij->size, *owners = aij->rowners;
133   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
134   MPI_Request *send_waits,*recv_waits;
135   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
136   InsertMode  addv;
137   Scalar      *rvalues,*svalues;
138 
139   /* make sure all processors are either in INSERTMODE or ADDMODE */
140   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
143   }
144   aij->insertmode = addv; /* in case this processor had no cache */
145 
146   /*  first count number of contributors to each processor */
147   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
149   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
150   for ( i=0; i<aij->stash.n; i++ ) {
151     idx = aij->stash.idx[i];
152     for ( j=0; j<size; j++ ) {
153       if (idx >= owners[j] && idx < owners[j+1]) {
154         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
155       }
156     }
157   }
158   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
159 
160   /* inform other processors of number of messages and max length*/
161   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
162   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
163   nreceives = work[rank];
164   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
165   nmax = work[rank];
166   PetscFree(work);
167 
168   /* post receives:
169        1) each message will consist of ordered pairs
170      (global index,value) we store the global index as a double
171      to simplify the message passing.
172        2) since we don't know how long each individual message is we
173      allocate the largest needed buffer for each receive. Potentially
174      this is a lot of wasted space.
175 
176 
177        This could be done better.
178   */
179   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
180   CHKPTRQ(rvalues);
181   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
182   CHKPTRQ(recv_waits);
183   for ( i=0; i<nreceives; i++ ) {
184     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
185               comm,recv_waits+i);
186   }
187 
188   /* do sends:
189       1) starts[i] gives the starting index in svalues for stuff going to
190          the ith processor
191   */
192   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
193   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
194   CHKPTRQ(send_waits);
195   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
196   starts[0] = 0;
197   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
198   for ( i=0; i<aij->stash.n; i++ ) {
199     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
200     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
201     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
202   }
203   PetscFree(owner);
204   starts[0] = 0;
205   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
206   count = 0;
207   for ( i=0; i<size; i++ ) {
208     if (procs[i]) {
209       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
210                 comm,send_waits+count++);
211     }
212   }
213   PetscFree(starts); PetscFree(nprocs);
214 
215   /* Free cache space */
216   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
217   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
218 
219   aij->svalues    = svalues;    aij->rvalues    = rvalues;
220   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
221   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
222   aij->rmax       = nmax;
223 
224   return 0;
225 }
226 extern int MatSetUpMultiply_MPIAIJ(Mat);
227 
228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
229 {
230   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
232   MPI_Status  *send_status,recv_status;
233   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234   int         row,col,other_disassembled,shift = C->indexshift;
235   Scalar      *values,val;
236   InsertMode  addv = aij->insertmode;
237 
238   /*  wait on receives */
239   while (count) {
240     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
241     /* unpack receives into our local space */
242     values = aij->rvalues + 3*imdex*aij->rmax;
243     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
244     n = n/3;
245     for ( i=0; i<n; i++ ) {
246       row = (int) PetscReal(values[3*i]) - aij->rstart;
247       col = (int) PetscReal(values[3*i+1]);
248       val = values[3*i+2];
249       if (col >= aij->cstart && col < aij->cend) {
250         col -= aij->cstart;
251         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
252       }
253       else {
254         if (mat->was_assembled) {
255           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256           col = aij->colmap[col] + shift;
257           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
258             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259             col = (int) PetscReal(values[3*i+1]);
260           }
261         }
262         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
263       }
264     }
265     count--;
266   }
267   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
268 
269   /* wait on sends */
270   if (aij->nsends) {
271     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
272     CHKPTRQ(send_status);
273     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
274     PetscFree(send_status);
275   }
276   PetscFree(aij->send_waits); PetscFree(aij->svalues);
277 
278   aij->insertmode = NOT_SET_VALUES;
279   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
280   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
281 
282   /* determine if any processor has disassembled, if so we must
283      also disassemble ourselfs, in order that we may reassemble. */
284   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285   if (mat->was_assembled && !other_disassembled) {
286     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
287   }
288 
289   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
290     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
291   }
292   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
293   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
294 
295   return 0;
296 }
297 
298 static int MatZeroEntries_MPIAIJ(Mat A)
299 {
300   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
301   int        ierr;
302   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
303   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
304   return 0;
305 }
306 
307 /* the code does not do the diagonal entries correctly unless the
308    matrix is square and the column and row owerships are identical.
309    This is a BUG. The only way to fix it seems to be to access
310    aij->A and aij->B directly and not through the MatZeroRows()
311    routine.
312 */
313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
314 {
315   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
316   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
317   int            *procs,*nprocs,j,found,idx,nsends,*work;
318   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
319   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
320   int            *lens,imdex,*lrows,*values;
321   MPI_Comm       comm = A->comm;
322   MPI_Request    *send_waits,*recv_waits;
323   MPI_Status     recv_status,*send_status;
324   IS             istmp;
325 
326   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
327   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
328 
329   /*  first count number of contributors to each processor */
330   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
331   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
332   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
333   for ( i=0; i<N; i++ ) {
334     idx = rows[i];
335     found = 0;
336     for ( j=0; j<size; j++ ) {
337       if (idx >= owners[j] && idx < owners[j+1]) {
338         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
339       }
340     }
341     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
342   }
343   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
344 
345   /* inform other processors of number of messages and max length*/
346   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
347   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
348   nrecvs = work[rank];
349   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
350   nmax = work[rank];
351   PetscFree(work);
352 
353   /* post receives:   */
354   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
355   CHKPTRQ(rvalues);
356   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
357   CHKPTRQ(recv_waits);
358   for ( i=0; i<nrecvs; i++ ) {
359     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
367   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
368   CHKPTRQ(send_waits);
369   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
370   starts[0] = 0;
371   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
372   for ( i=0; i<N; i++ ) {
373     svalues[starts[owner[i]]++] = rows[i];
374   }
375   ISRestoreIndices(is,&rows);
376 
377   starts[0] = 0;
378   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<size; i++ ) {
381     if (procs[i]) {
382       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
383     }
384   }
385   PetscFree(starts);
386 
387   base = owners[rank];
388 
389   /*  wait on receives */
390   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
391   source = lens + nrecvs;
392   count  = nrecvs; slen = 0;
393   while (count) {
394     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
395     /* unpack receives into our local space */
396     MPI_Get_count(&recv_status,MPI_INT,&n);
397     source[imdex]  = recv_status.MPI_SOURCE;
398     lens[imdex]  = n;
399     slen += n;
400     count--;
401   }
402   PetscFree(recv_waits);
403 
404   /* move the data into the send scatter */
405   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
406   count = 0;
407   for ( i=0; i<nrecvs; i++ ) {
408     values = rvalues + i*nmax;
409     for ( j=0; j<lens[i]; j++ ) {
410       lrows[count++] = values[j] - base;
411     }
412   }
413   PetscFree(rvalues); PetscFree(lens);
414   PetscFree(owner); PetscFree(nprocs);
415 
416   /* actually zap the local rows */
417   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
418   PLogObjectParent(A,istmp);
419   PetscFree(lrows);
420   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
421   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
422   ierr = ISDestroy(istmp); CHKERRQ(ierr);
423 
424   /* wait on sends */
425   if (nsends) {
426     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
427     CHKPTRQ(send_status);
428     MPI_Waitall(nsends,send_waits,send_status);
429     PetscFree(send_status);
430   }
431   PetscFree(send_waits); PetscFree(svalues);
432 
433   return 0;
434 }
435 
436 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
437 {
438   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
439   int        ierr;
440 
441   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
442   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
443   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
444   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
445   return 0;
446 }
447 
448 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
449 {
450   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
451   int        ierr;
452   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
453   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
454   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
455   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
456   return 0;
457 }
458 
459 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
460 {
461   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
462   int        ierr;
463 
464   /* do nondiagonal part */
465   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
466   /* send it on its way */
467   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
468                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
469   /* do local part */
470   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
471   /* receive remote parts: note this assumes the values are not actually */
472   /* inserted in yy until the next line, which is true for my implementation*/
473   /* but is not perhaps always true. */
474   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
475                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
476   return 0;
477 }
478 
479 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
480 {
481   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
482   int        ierr;
483 
484   /* do nondiagonal part */
485   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
486   /* send it on its way */
487   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
488                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
489   /* do local part */
490   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
491   /* receive remote parts: note this assumes the values are not actually */
492   /* inserted in yy until the next line, which is true for my implementation*/
493   /* but is not perhaps always true. */
494   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
495                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
496   return 0;
497 }
498 
499 /*
500   This only works correctly for square matrices where the subblock A->A is the
501    diagonal block
502 */
503 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
504 {
505   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
506   return MatGetDiagonal(a->A,v);
507 }
508 
509 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
510 {
511   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512   int        ierr;
513   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
514   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
515   return 0;
516 }
517 
518 static int MatDestroy_MPIAIJ(PetscObject obj)
519 {
520   Mat        mat = (Mat) obj;
521   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
522   int        ierr;
523 #if defined(PETSC_LOG)
524   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
525 #endif
526   PetscFree(aij->rowners);
527   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
528   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
529   if (aij->colmap) PetscFree(aij->colmap);
530   if (aij->garray) PetscFree(aij->garray);
531   if (aij->lvec)   VecDestroy(aij->lvec);
532   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
533   PetscFree(aij);
534   PLogObjectDestroy(mat);
535   PetscHeaderDestroy(mat);
536   return 0;
537 }
538 #include "draw.h"
539 #include "pinclude/pviewer.h"
540 
541 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
542 {
543   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
544   int         ierr;
545 
546   if (aij->size == 1) {
547     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
548   }
549   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
550   return 0;
551 }
552 
553 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
554 {
555   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
556   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
557   int         ierr, format,shift = C->indexshift,rank;
558   PetscObject vobj = (PetscObject) viewer;
559   FILE        *fd;
560 
561   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
562     ierr = ViewerFileGetFormat_Private(viewer,&format);
563     if (format == FILE_FORMAT_INFO_DETAILED) {
564       int nz,nzalloc,mem;
565       MPI_Comm_rank(mat->comm,&rank);
566       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
567       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
568       MPIU_Seq_begin(mat->comm,1);
569       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
570                 nzalloc,mem);
571       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
572       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
573       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
574       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
575       fflush(fd);
576       MPIU_Seq_end(mat->comm,1);
577       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
578       return 0;
579     }
580     else if (format == FILE_FORMAT_INFO) {
581       return 0;
582     }
583   }
584 
585   if (vobj->type == ASCII_FILE_VIEWER) {
586     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
587     MPIU_Seq_begin(mat->comm,1);
588     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
589            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
590            aij->cend);
591     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
592     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
593     fflush(fd);
594     MPIU_Seq_end(mat->comm,1);
595   }
596   else {
597     int size = aij->size;
598     rank = aij->rank;
599     if (size == 1) {
600       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
601     }
602     else {
603       /* assemble the entire matrix onto first processor. */
604       Mat         A;
605       Mat_SeqAIJ *Aloc;
606       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
607       Scalar      *a;
608 
609       if (!rank) {
610         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
611                CHKERRQ(ierr);
612       }
613       else {
614         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
615                CHKERRQ(ierr);
616       }
617       PLogObjectParent(mat,A);
618 
619       /* copy over the A part */
620       Aloc = (Mat_SeqAIJ*) aij->A->data;
621       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
622       row = aij->rstart;
623       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
624       for ( i=0; i<m; i++ ) {
625         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
626         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
627       }
628       aj = Aloc->j;
629       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
630 
631       /* copy over the B part */
632       Aloc = (Mat_SeqAIJ*) aij->B->data;
633       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
634       row = aij->rstart;
635       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
636       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
637       for ( i=0; i<m; i++ ) {
638         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
639         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
640       }
641       PetscFree(ct);
642       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
643       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
644       if (!rank) {
645         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
646       }
647       ierr = MatDestroy(A); CHKERRQ(ierr);
648     }
649   }
650   return 0;
651 }
652 
653 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
654 {
655   Mat         mat = (Mat) obj;
656   int         ierr;
657   PetscObject vobj = (PetscObject) viewer;
658 
659   if (!viewer) {
660     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
661   }
662   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
663   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
664     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
665   }
666   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
667     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
668   }
669   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
670     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
671   }
672   else if (vobj->cookie == DRAW_COOKIE) {
673     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
674   }
675   else if (vobj->type == BINARY_FILE_VIEWER) {
676     return MatView_MPIAIJ_Binary(mat,viewer);
677   }
678   return 0;
679 }
680 
681 extern int MatMarkDiag_SeqAIJ(Mat);
682 /*
683     This has to provide several versions.
684 
685      1) per sequential
686      2) a) use only local smoothing updating outer values only once.
687         b) local smoothing updating outer values each inner iteration
688      3) color updating out values betwen colors.
689 */
690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
691                            double fshift,int its,Vec xx)
692 {
693   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
694   Mat        AA = mat->A, BB = mat->B;
695   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
696   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
697   int        ierr,*idx, *diag;
698   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
699   Vec        tt;
700 
701   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
702   xs = x + shift; /* shift by one for index start of 1 */
703   ls = ls + shift;
704   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
705   diag = A->diag;
706   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
707     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
708   }
709   if (flag & SOR_EISENSTAT) {
710     /* Let  A = L + U + D; where L is lower trianglar,
711     U is upper triangular, E is diagonal; This routine applies
712 
713             (L + E)^{-1} A (U + E)^{-1}
714 
715     to a vector efficiently using Eisenstat's trick. This is for
716     the case of SSOR preconditioner, so E is D/omega where omega
717     is the relaxation factor.
718     */
719     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
720     PLogObjectParent(matin,tt);
721     VecGetArray(tt,&t);
722     scale = (2.0/omega) - 1.0;
723     /*  x = (E + U)^{-1} b */
724     VecSet(&zero,mat->lvec);
725     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
726                               mat->Mvctx); CHKERRQ(ierr);
727     for ( i=m-1; i>-1; i-- ) {
728       n    = A->i[i+1] - diag[i] - 1;
729       idx  = A->j + diag[i] + !shift;
730       v    = A->a + diag[i] + !shift;
731       sum  = b[i];
732       SPARSEDENSEMDOT(sum,xs,v,idx,n);
733       d    = fshift + A->a[diag[i]+shift];
734       n    = B->i[i+1] - B->i[i];
735       idx  = B->j + B->i[i] + shift;
736       v    = B->a + B->i[i] + shift;
737       SPARSEDENSEMDOT(sum,ls,v,idx,n);
738       x[i] = omega*(sum/d);
739     }
740     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
741                             mat->Mvctx); CHKERRQ(ierr);
742 
743     /*  t = b - (2*E - D)x */
744     v = A->a;
745     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
746 
747     /*  t = (E + L)^{-1}t */
748     ts = t + shift; /* shifted by one for index start of a or mat->j*/
749     diag = A->diag;
750     VecSet(&zero,mat->lvec);
751     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
752                                                  mat->Mvctx); CHKERRQ(ierr);
753     for ( i=0; i<m; i++ ) {
754       n    = diag[i] - A->i[i];
755       idx  = A->j + A->i[i] + shift;
756       v    = A->a + A->i[i] + shift;
757       sum  = t[i];
758       SPARSEDENSEMDOT(sum,ts,v,idx,n);
759       d    = fshift + A->a[diag[i]+shift];
760       n    = B->i[i+1] - B->i[i];
761       idx  = B->j + B->i[i] + shift;
762       v    = B->a + B->i[i] + shift;
763       SPARSEDENSEMDOT(sum,ls,v,idx,n);
764       t[i] = omega*(sum/d);
765     }
766     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
767                                                     mat->Mvctx); CHKERRQ(ierr);
768     /*  x = x + t */
769     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
770     VecDestroy(tt);
771     return 0;
772   }
773 
774 
775   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
776     if (flag & SOR_ZERO_INITIAL_GUESS) {
777       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
778     }
779     else {
780       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
781       CHKERRQ(ierr);
782       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
783       CHKERRQ(ierr);
784     }
785     while (its--) {
786       /* go down through the rows */
787       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
788                               mat->Mvctx); CHKERRQ(ierr);
789       for ( i=0; i<m; i++ ) {
790         n    = A->i[i+1] - A->i[i];
791         idx  = A->j + A->i[i] + shift;
792         v    = A->a + A->i[i] + shift;
793         sum  = b[i];
794         SPARSEDENSEMDOT(sum,xs,v,idx,n);
795         d    = fshift + A->a[diag[i]+shift];
796         n    = B->i[i+1] - B->i[i];
797         idx  = B->j + B->i[i] + shift;
798         v    = B->a + B->i[i] + shift;
799         SPARSEDENSEMDOT(sum,ls,v,idx,n);
800         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
801       }
802       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
803                             mat->Mvctx); CHKERRQ(ierr);
804       /* come up through the rows */
805       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
806                               mat->Mvctx); CHKERRQ(ierr);
807       for ( i=m-1; i>-1; i-- ) {
808         n    = A->i[i+1] - A->i[i];
809         idx  = A->j + A->i[i] + shift;
810         v    = A->a + A->i[i] + shift;
811         sum  = b[i];
812         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813         d    = fshift + A->a[diag[i]+shift];
814         n    = B->i[i+1] - B->i[i];
815         idx  = B->j + B->i[i] + shift;
816         v    = B->a + B->i[i] + shift;
817         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
819       }
820       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
821                             mat->Mvctx); CHKERRQ(ierr);
822     }
823   }
824   else if (flag & SOR_FORWARD_SWEEP){
825     if (flag & SOR_ZERO_INITIAL_GUESS) {
826       VecSet(&zero,mat->lvec);
827       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
828                               mat->Mvctx); CHKERRQ(ierr);
829       for ( i=0; i<m; i++ ) {
830         n    = diag[i] - A->i[i];
831         idx  = A->j + A->i[i] + shift;
832         v    = A->a + A->i[i] + shift;
833         sum  = b[i];
834         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835         d    = fshift + A->a[diag[i]+shift];
836         n    = B->i[i+1] - B->i[i];
837         idx  = B->j + B->i[i] + shift;
838         v    = B->a + B->i[i] + shift;
839         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840         x[i] = omega*(sum/d);
841       }
842       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
843                             mat->Mvctx); CHKERRQ(ierr);
844       its--;
845     }
846     while (its--) {
847       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
848       CHKERRQ(ierr);
849       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
850       CHKERRQ(ierr);
851       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
852                               mat->Mvctx); CHKERRQ(ierr);
853       for ( i=0; i<m; i++ ) {
854         n    = A->i[i+1] - A->i[i];
855         idx  = A->j + A->i[i] + shift;
856         v    = A->a + A->i[i] + shift;
857         sum  = b[i];
858         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859         d    = fshift + A->a[diag[i]+shift];
860         n    = B->i[i+1] - B->i[i];
861         idx  = B->j + B->i[i] + shift;
862         v    = B->a + B->i[i] + shift;
863         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
865       }
866       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
867                             mat->Mvctx); CHKERRQ(ierr);
868     }
869   }
870   else if (flag & SOR_BACKWARD_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       VecSet(&zero,mat->lvec);
873       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
874                               mat->Mvctx); CHKERRQ(ierr);
875       for ( i=m-1; i>-1; i-- ) {
876         n    = A->i[i+1] - diag[i] - 1;
877         idx  = A->j + diag[i] + !shift;
878         v    = A->a + diag[i] + !shift;
879         sum  = b[i];
880         SPARSEDENSEMDOT(sum,xs,v,idx,n);
881         d    = fshift + A->a[diag[i]+shift];
882         n    = B->i[i+1] - B->i[i];
883         idx  = B->j + B->i[i] + shift;
884         v    = B->a + B->i[i] + shift;
885         SPARSEDENSEMDOT(sum,ls,v,idx,n);
886         x[i] = omega*(sum/d);
887       }
888       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
889                             mat->Mvctx); CHKERRQ(ierr);
890       its--;
891     }
892     while (its--) {
893       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
894                             mat->Mvctx); CHKERRQ(ierr);
895       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
896                             mat->Mvctx); CHKERRQ(ierr);
897       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
898                               mat->Mvctx); CHKERRQ(ierr);
899       for ( i=m-1; i>-1; i-- ) {
900         n    = A->i[i+1] - A->i[i];
901         idx  = A->j + A->i[i] + shift;
902         v    = A->a + A->i[i] + shift;
903         sum  = b[i];
904         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905         d    = fshift + A->a[diag[i]+shift];
906         n    = B->i[i+1] - B->i[i];
907         idx  = B->j + B->i[i] + shift;
908         v    = B->a + B->i[i] + shift;
909         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
911       }
912       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
913                             mat->Mvctx); CHKERRQ(ierr);
914     }
915   }
916   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
917     if (flag & SOR_ZERO_INITIAL_GUESS) {
918       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
919     }
920     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
921     CHKERRQ(ierr);
922     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
923     CHKERRQ(ierr);
924     while (its--) {
925       /* go down through the rows */
926       for ( i=0; i<m; i++ ) {
927         n    = A->i[i+1] - A->i[i];
928         idx  = A->j + A->i[i] + shift;
929         v    = A->a + A->i[i] + shift;
930         sum  = b[i];
931         SPARSEDENSEMDOT(sum,xs,v,idx,n);
932         d    = fshift + A->a[diag[i]+shift];
933         n    = B->i[i+1] - B->i[i];
934         idx  = B->j + B->i[i] + shift;
935         v    = B->a + B->i[i] + shift;
936         SPARSEDENSEMDOT(sum,ls,v,idx,n);
937         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
938       }
939       /* come up through the rows */
940       for ( i=m-1; i>-1; i-- ) {
941         n    = A->i[i+1] - A->i[i];
942         idx  = A->j + A->i[i] + shift;
943         v    = A->a + A->i[i] + shift;
944         sum  = b[i];
945         SPARSEDENSEMDOT(sum,xs,v,idx,n);
946         d    = fshift + A->a[diag[i]+shift];
947         n    = B->i[i+1] - B->i[i];
948         idx  = B->j + B->i[i] + shift;
949         v    = B->a + B->i[i] + shift;
950         SPARSEDENSEMDOT(sum,ls,v,idx,n);
951         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
952       }
953     }
954   }
955   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
956     if (flag & SOR_ZERO_INITIAL_GUESS) {
957       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
958     }
959     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
960     CHKERRQ(ierr);
961     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
962     CHKERRQ(ierr);
963     while (its--) {
964       for ( i=0; i<m; i++ ) {
965         n    = A->i[i+1] - A->i[i];
966         idx  = A->j + A->i[i] + shift;
967         v    = A->a + A->i[i] + shift;
968         sum  = b[i];
969         SPARSEDENSEMDOT(sum,xs,v,idx,n);
970         d    = fshift + A->a[diag[i]+shift];
971         n    = B->i[i+1] - B->i[i];
972         idx  = B->j + B->i[i] + shift;
973         v    = B->a + B->i[i] + shift;
974         SPARSEDENSEMDOT(sum,ls,v,idx,n);
975         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
976       }
977     }
978   }
979   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
980     if (flag & SOR_ZERO_INITIAL_GUESS) {
981       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
982     }
983     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
984                             mat->Mvctx); CHKERRQ(ierr);
985     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
986                             mat->Mvctx); CHKERRQ(ierr);
987     while (its--) {
988       for ( i=m-1; i>-1; i-- ) {
989         n    = A->i[i+1] - A->i[i];
990         idx  = A->j + A->i[i] + shift;
991         v    = A->a + A->i[i] + shift;
992         sum  = b[i];
993         SPARSEDENSEMDOT(sum,xs,v,idx,n);
994         d    = fshift + A->a[diag[i]+shift];
995         n    = B->i[i+1] - B->i[i];
996         idx  = B->j + B->i[i] + shift;
997         v    = B->a + B->i[i] + shift;
998         SPARSEDENSEMDOT(sum,ls,v,idx,n);
999         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1000       }
1001     }
1002   }
1003   return 0;
1004 }
1005 
1006 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1007                              int *nzalloc,int *mem)
1008 {
1009   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1010   Mat        A = mat->A, B = mat->B;
1011   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1012 
1013   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1014   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1015   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1016   if (flag == MAT_LOCAL) {
1017     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1018   } else if (flag == MAT_GLOBAL_MAX) {
1019     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1020     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1021   } else if (flag == MAT_GLOBAL_SUM) {
1022     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1023     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1024   }
1025   return 0;
1026 }
1027 
1028 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1029 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1030 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1031 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1032 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1033 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1034 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1035 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1036 
1037 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1038 {
1039   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1040 
1041   if (op == NO_NEW_NONZERO_LOCATIONS ||
1042       op == YES_NEW_NONZERO_LOCATIONS ||
1043       op == COLUMNS_SORTED ||
1044       op == ROW_ORIENTED) {
1045         MatSetOption(a->A,op);
1046         MatSetOption(a->B,op);
1047   }
1048   else if (op == ROWS_SORTED ||
1049            op == SYMMETRIC_MATRIX ||
1050            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1051            op == YES_NEW_DIAGONALS)
1052     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1053   else if (op == COLUMN_ORIENTED) {
1054     a->roworiented = 0;
1055     MatSetOption(a->A,op);
1056     MatSetOption(a->B,op);
1057   }
1058   else if (op == NO_NEW_DIAGONALS)
1059     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1060   else
1061     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1062   return 0;
1063 }
1064 
1065 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1066 {
1067   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1068   *m = mat->M; *n = mat->N;
1069   return 0;
1070 }
1071 
1072 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1073 {
1074   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1075   *m = mat->m; *n = mat->N;
1076   return 0;
1077 }
1078 
1079 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1080 {
1081   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1082   *m = mat->rstart; *n = mat->rend;
1083   return 0;
1084 }
1085 
1086 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1087 {
1088   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1089   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1090   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1091   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1092 
1093   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1094   lrow = row - rstart;
1095 
1096   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1097   if (!v)   {pvA = 0; pvB = 0;}
1098   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1099   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1100   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1101   nztot = nzA + nzB;
1102 
1103   if (v  || idx) {
1104     if (nztot) {
1105       /* Sort by increasing column numbers, assuming A and B already sorted */
1106       int imark;
1107       for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1108       if (v) {
1109         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1110         for ( i=0; i<nzB; i++ ) {
1111           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1112           else break;
1113         }
1114         imark = i;
1115         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1116         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1117       }
1118       if (idx) {
1119         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1120         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1121         for ( i=0; i<nzB; i++ ) {
1122           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1123           else break;
1124         }
1125         imark = i;
1126         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1127         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1128       }
1129     }
1130     else {*idx = 0; *v=0;}
1131   }
1132   *nz = nztot;
1133   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1134   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1135   return 0;
1136 }
1137 
1138 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1139 {
1140   if (idx) PetscFree(*idx);
1141   if (v) PetscFree(*v);
1142   return 0;
1143 }
1144 
1145 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1146 {
1147   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1148   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1149   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1150   double     sum = 0.0;
1151   Scalar     *v;
1152 
1153   if (aij->size == 1) {
1154     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1155   } else {
1156     if (type == NORM_FROBENIUS) {
1157       v = amat->a;
1158       for (i=0; i<amat->nz; i++ ) {
1159 #if defined(PETSC_COMPLEX)
1160         sum += real(conj(*v)*(*v)); v++;
1161 #else
1162         sum += (*v)*(*v); v++;
1163 #endif
1164       }
1165       v = bmat->a;
1166       for (i=0; i<bmat->nz; i++ ) {
1167 #if defined(PETSC_COMPLEX)
1168         sum += real(conj(*v)*(*v)); v++;
1169 #else
1170         sum += (*v)*(*v); v++;
1171 #endif
1172       }
1173       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1174       *norm = sqrt(*norm);
1175     }
1176     else if (type == NORM_1) { /* max column norm */
1177       double *tmp, *tmp2;
1178       int    *jj, *garray = aij->garray;
1179       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1180       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1181       PetscMemzero(tmp,aij->N*sizeof(double));
1182       *norm = 0.0;
1183       v = amat->a; jj = amat->j;
1184       for ( j=0; j<amat->nz; j++ ) {
1185         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1186       }
1187       v = bmat->a; jj = bmat->j;
1188       for ( j=0; j<bmat->nz; j++ ) {
1189         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1190       }
1191       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1192       for ( j=0; j<aij->N; j++ ) {
1193         if (tmp2[j] > *norm) *norm = tmp2[j];
1194       }
1195       PetscFree(tmp); PetscFree(tmp2);
1196     }
1197     else if (type == NORM_INFINITY) { /* max row norm */
1198       double ntemp = 0.0;
1199       for ( j=0; j<amat->m; j++ ) {
1200         v = amat->a + amat->i[j] + shift;
1201         sum = 0.0;
1202         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1203           sum += PetscAbsScalar(*v); v++;
1204         }
1205         v = bmat->a + bmat->i[j] + shift;
1206         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1207           sum += PetscAbsScalar(*v); v++;
1208         }
1209         if (sum > ntemp) ntemp = sum;
1210       }
1211       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1212     }
1213     else {
1214       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1215     }
1216   }
1217   return 0;
1218 }
1219 
1220 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1221 {
1222   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1223   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1224   int        ierr,shift = Aloc->indexshift;
1225   Mat        B;
1226   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1227   Scalar     *array;
1228 
1229   if (matout == PETSC_NULL && M != N)
1230     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1231   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1232          PETSC_NULL,&B); CHKERRQ(ierr);
1233 
1234   /* copy over the A part */
1235   Aloc = (Mat_SeqAIJ*) a->A->data;
1236   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1237   row = a->rstart;
1238   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1239   for ( i=0; i<m; i++ ) {
1240     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1241     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1242   }
1243   aj = Aloc->j;
1244   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1245 
1246   /* copy over the B part */
1247   Aloc = (Mat_SeqAIJ*) a->B->data;
1248   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249   row = a->rstart;
1250   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1251   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1252   for ( i=0; i<m; i++ ) {
1253     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1254     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1255   }
1256   PetscFree(ct);
1257   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1258   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1259   if (matout != PETSC_NULL) {
1260     *matout = B;
1261   } else {
1262     /* This isn't really an in-place transpose .... but free data structures from a */
1263     PetscFree(a->rowners);
1264     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1265     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1266     if (a->colmap) PetscFree(a->colmap);
1267     if (a->garray) PetscFree(a->garray);
1268     if (a->lvec) VecDestroy(a->lvec);
1269     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1270     PetscFree(a);
1271     PetscMemcpy(A,B,sizeof(struct _Mat));
1272     PetscHeaderDestroy(B);
1273   }
1274   return 0;
1275 }
1276 
1277 extern int MatPrintHelp_SeqAIJ(Mat);
1278 static int MatPrintHelp_MPIAIJ(Mat A)
1279 {
1280   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281 
1282   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1283   else return 0;
1284 }
1285 
1286 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1287 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1288 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1289 /* -------------------------------------------------------------------*/
1290 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1291        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1292        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1293        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1294        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1295        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1296        MatLUFactor_MPIAIJ,0,
1297        MatRelax_MPIAIJ,
1298        MatTranspose_MPIAIJ,
1299        MatGetInfo_MPIAIJ,0,
1300        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1301        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1302        0,
1303        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1304        MatGetReordering_MPIAIJ,
1305        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1306        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1307        MatILUFactorSymbolic_MPIAIJ,0,
1308        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1309        0,0,0,
1310        0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1311        MatPrintHelp_MPIAIJ,
1312        MatScale_MPIAIJ};
1313 
1314 /*@C
1315    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1316    (the default parallel PETSc format). For good performance you should
1317    preallocate the matrix storage by setting the parameters d_nz (or d_nzz)
1318    and o_nz (or o_nzz). By setting these parameters accurately, performance
1319    can be increased by more than a factor of 50.
1320 
1321    Input Parameters:
1322 .  comm - MPI communicator
1323 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1324 .  n - number of local columns (or PETSC_DECIDE to have calculated
1325            if N is given)
1326 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1327 .  N - number of global columns (or PETSC_DECIDE to have calculated
1328            if n is given)
1329 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1330            (same for all local rows)
1331 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1332            or null (possibly different for each row).  You must leave room
1333            for the diagonal entry even if it is zero.
1334 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1335            submatrix (same for all local rows).
1336 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1337            submatrix or null (possibly different for each row).
1338 
1339    Output Parameter:
1340 .  newmat - the matrix
1341 
1342    Notes:
1343    The AIJ format (also called the Yale sparse matrix format or
1344    compressed row storage), is fully compatible with standard Fortran 77
1345    storage.  That is, the stored row and column indices can begin at
1346    either one (as in Fortran) or zero.  See the users manual for details.
1347 
1348    The user MUST specify either the local or global matrix dimensions
1349    (possibly both).
1350 
1351    By default, this format uses inodes (identical nodes) when possible.
1352    We search for consecutive rows with the same nonzero structure, thereby
1353    reusing matrix information to achieve increased efficiency.
1354 
1355    Options Database Keys:
1356 $    -mat_aij_no_inode  - Do not use inodes
1357 $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1358 
1359    Storage Information:
1360    For a square global matrix we define each processor's diagonal portion
1361    to be its local rows and the corresponding columns (a square submatrix);
1362    each processor's off-diagonal portion encompasses the remainder of the
1363    local matrix (a rectangular submatrix).
1364 
1365    The user can specify preallocated storage for the diagonal part of
1366    the local submatrix with either d_nz or d_nnz (not both).  Set
1367    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1368    memory allocation.  Likewise, specify preallocated storage for the
1369    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1370 
1371    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1372    the figure below we depict these three local rows and all columns (0-11).
1373 
1374 $          0 1 2 3 4 5 6 7 8 9 10 11
1375 $         -------------------
1376 $  row 3  |  o o o d d d o o o o o o
1377 $  row 4  |  o o o d d d o o o o o o
1378 $  row 5  |  o o o d d d o o o o o o
1379 $         -------------------
1380 $
1381 
1382    Thus, any entries in the d locations are stored in the d (diagonal)
1383    submatrix, and any entries in the o locations are stored in the
1384    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1385    stored simply in the MATSEQAIJ format for compressed row storage.
1386 
1387    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1388    and o_nz should indicate the number of nonzeros per row in the o matrix.
1389    In general, for PDE problems in which most nonzeros are near the diagonal,
1390    one expects d_nz >> o_nz.  See the users manual chapter on matrices for
1391    more details.
1392 
1393 .keywords: matrix, aij, compressed row, sparse, parallel
1394 
1395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1396 @*/
1397 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1398                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
1399 {
1400   Mat          mat;
1401   Mat_MPIAIJ   *a;
1402   int          ierr, i,sum[2],work[2];
1403 
1404   *newmat         = 0;
1405   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1406   PLogObjectCreate(mat);
1407   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1408   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1409   mat->destroy    = MatDestroy_MPIAIJ;
1410   mat->view       = MatView_MPIAIJ;
1411   mat->factor     = 0;
1412   mat->assembled  = PETSC_FALSE;
1413 
1414   a->insertmode = NOT_SET_VALUES;
1415   MPI_Comm_rank(comm,&a->rank);
1416   MPI_Comm_size(comm,&a->size);
1417 
1418   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1419     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1420 
1421   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1422     work[0] = m; work[1] = n;
1423     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1424     if (M == PETSC_DECIDE) M = sum[0];
1425     if (N == PETSC_DECIDE) N = sum[1];
1426   }
1427   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1428   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1429   a->m = m;
1430   a->n = n;
1431   a->N = N;
1432   a->M = M;
1433 
1434   /* build local table of row and column ownerships */
1435   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1436   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1437   a->cowners = a->rowners + a->size + 1;
1438   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1439   a->rowners[0] = 0;
1440   for ( i=2; i<=a->size; i++ ) {
1441     a->rowners[i] += a->rowners[i-1];
1442   }
1443   a->rstart = a->rowners[a->rank];
1444   a->rend   = a->rowners[a->rank+1];
1445   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1446   a->cowners[0] = 0;
1447   for ( i=2; i<=a->size; i++ ) {
1448     a->cowners[i] += a->cowners[i-1];
1449   }
1450   a->cstart = a->cowners[a->rank];
1451   a->cend   = a->cowners[a->rank+1];
1452 
1453   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1454   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1455   PLogObjectParent(mat,a->A);
1456   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1457   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1458   PLogObjectParent(mat,a->B);
1459 
1460   /* build cache for off array entries formed */
1461   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1462   a->colmap      = 0;
1463   a->garray      = 0;
1464   a->roworiented = 1;
1465 
1466   /* stuff used for matrix vector multiply */
1467   a->lvec      = 0;
1468   a->Mvctx     = 0;
1469 
1470   *newmat = mat;
1471   return 0;
1472 }
1473 
1474 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1475 {
1476   Mat        mat;
1477   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1478   int        ierr, len,flg;
1479 
1480   *newmat       = 0;
1481   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1482   PLogObjectCreate(mat);
1483   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1484   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1485   mat->destroy    = MatDestroy_MPIAIJ;
1486   mat->view       = MatView_MPIAIJ;
1487   mat->factor     = matin->factor;
1488   mat->assembled  = PETSC_TRUE;
1489 
1490   a->m          = oldmat->m;
1491   a->n          = oldmat->n;
1492   a->M          = oldmat->M;
1493   a->N          = oldmat->N;
1494 
1495   a->rstart     = oldmat->rstart;
1496   a->rend       = oldmat->rend;
1497   a->cstart     = oldmat->cstart;
1498   a->cend       = oldmat->cend;
1499   a->size       = oldmat->size;
1500   a->rank       = oldmat->rank;
1501   a->insertmode = NOT_SET_VALUES;
1502 
1503   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1504   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1505   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1506   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1507   if (oldmat->colmap) {
1508     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1509     PLogObjectMemory(mat,(a->N)*sizeof(int));
1510     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1511   } else a->colmap = 0;
1512   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1513     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1514     PLogObjectMemory(mat,len*sizeof(int));
1515     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1516   } else a->garray = 0;
1517 
1518   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1519   PLogObjectParent(mat,a->lvec);
1520   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1521   PLogObjectParent(mat,a->Mvctx);
1522   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1523   PLogObjectParent(mat,a->A);
1524   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1525   PLogObjectParent(mat,a->B);
1526   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1527   if (flg) {
1528     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1529   }
1530   *newmat = mat;
1531   return 0;
1532 }
1533 
1534 #include "sysio.h"
1535 
1536 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat)
1537 {
1538   Mat          A;
1539   int          i, nz, ierr, j,rstart, rend, fd;
1540   Scalar       *vals,*svals;
1541   PetscObject  vobj = (PetscObject) bview;
1542   MPI_Comm     comm = vobj->comm;
1543   MPI_Status   status;
1544   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1545   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1546   int          tag = ((PetscObject)bview)->tag;
1547 
1548   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1549   if (!rank) {
1550     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1551     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1552     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1553   }
1554 
1555   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1556   M = header[1]; N = header[2];
1557   /* determine ownership of all rows */
1558   m = M/size + ((M % size) > rank);
1559   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1560   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1561   rowners[0] = 0;
1562   for ( i=2; i<=size; i++ ) {
1563     rowners[i] += rowners[i-1];
1564   }
1565   rstart = rowners[rank];
1566   rend   = rowners[rank+1];
1567 
1568   /* distribute row lengths to all processors */
1569   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1570   offlens = ourlens + (rend-rstart);
1571   if (!rank) {
1572     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1573     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1574     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1575     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1576     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1577     PetscFree(sndcounts);
1578   }
1579   else {
1580     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1581   }
1582 
1583   if (!rank) {
1584     /* calculate the number of nonzeros on each processor */
1585     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1586     PetscMemzero(procsnz,size*sizeof(int));
1587     for ( i=0; i<size; i++ ) {
1588       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1589         procsnz[i] += rowlengths[j];
1590       }
1591     }
1592     PetscFree(rowlengths);
1593 
1594     /* determine max buffer needed and allocate it */
1595     maxnz = 0;
1596     for ( i=0; i<size; i++ ) {
1597       maxnz = PetscMax(maxnz,procsnz[i]);
1598     }
1599     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1600 
1601     /* read in my part of the matrix column indices  */
1602     nz = procsnz[0];
1603     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1604     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1605 
1606     /* read in every one elses and ship off */
1607     for ( i=1; i<size; i++ ) {
1608       nz = procsnz[i];
1609       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1610       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1611     }
1612     PetscFree(cols);
1613   }
1614   else {
1615     /* determine buffer space needed for message */
1616     nz = 0;
1617     for ( i=0; i<m; i++ ) {
1618       nz += ourlens[i];
1619     }
1620     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1621 
1622     /* receive message of column indices*/
1623     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1624     MPI_Get_count(&status,MPI_INT,&maxnz);
1625     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1626   }
1627 
1628   /* loop over local rows, determining number of off diagonal entries */
1629   PetscMemzero(offlens,m*sizeof(int));
1630   jj = 0;
1631   for ( i=0; i<m; i++ ) {
1632     for ( j=0; j<ourlens[i]; j++ ) {
1633       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1634       jj++;
1635     }
1636   }
1637 
1638   /* create our matrix */
1639   for ( i=0; i<m; i++ ) {
1640     ourlens[i] -= offlens[i];
1641   }
1642   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1643   A = *newmat;
1644   MatSetOption(A,COLUMNS_SORTED);
1645   for ( i=0; i<m; i++ ) {
1646     ourlens[i] += offlens[i];
1647   }
1648 
1649   if (!rank) {
1650     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1651 
1652     /* read in my part of the matrix numerical values  */
1653     nz = procsnz[0];
1654     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1655 
1656     /* insert into matrix */
1657     jj      = rstart;
1658     smycols = mycols;
1659     svals   = vals;
1660     for ( i=0; i<m; i++ ) {
1661       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1662       smycols += ourlens[i];
1663       svals   += ourlens[i];
1664       jj++;
1665     }
1666 
1667     /* read in other processors and ship out */
1668     for ( i=1; i<size; i++ ) {
1669       nz = procsnz[i];
1670       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1671       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1672     }
1673     PetscFree(procsnz);
1674   }
1675   else {
1676     /* receive numeric values */
1677     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1678 
1679     /* receive message of values*/
1680     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1681     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1682     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1683 
1684     /* insert into matrix */
1685     jj      = rstart;
1686     smycols = mycols;
1687     svals   = vals;
1688     for ( i=0; i<m; i++ ) {
1689       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1690       smycols += ourlens[i];
1691       svals   += ourlens[i];
1692       jj++;
1693     }
1694   }
1695   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1696 
1697   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1698   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1699   return 0;
1700 }
1701