xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cd2245dea71a3aa4fce336c3d5cb6692bd00b1f7)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.191 1997/02/22 02:25:15 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 
22   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23   PLogObjectMemory(mat,aij->N*sizeof(int));
24   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
26   return 0;
27 }
28 
29 extern int DisAssemble_MPIAIJ(Mat);
30 
31 #undef __FUNC__
32 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
34                            PetscTruth *done)
35 {
36   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37   int        ierr;
38   if (aij->size == 1) {
39     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40   } else SETERRQ(1,0,"not supported in parallel");
41   return 0;
42 }
43 
44 #undef __FUNC__
45 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
47                                PetscTruth *done)
48 {
49   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
50   int        ierr;
51   if (aij->size == 1) {
52     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53   } else SETERRQ(1,0,"not supported in parallel");
54   return 0;
55 }
56 
57 #define CHUNKSIZE   15
58 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \
59 { \
60  \
61     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
62     rmax = imax[row]; nrow = ailen[row];  \
63     col1 = col - shift; \
64      \
65       for ( _i=0; _i<nrow; _i++ ) { \
66         if (rp[_i] > col1) break; \
67         if (rp[_i] == col1) { \
68           if (addv == ADD_VALUES) ap[_i] += value;   \
69           else                  ap[_i] = value; \
70           goto _noinsert; \
71         } \
72       }  \
73       if (nonew)  goto _noinsert; \
74       if (nrow >= rmax) { \
75         /* there is no extra room in row, therefore enlarge */ \
76         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
77         Scalar *new_a; \
78  \
79         /* malloc new storage space */ \
80         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
81         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
82         new_j   = (int *) (new_a + new_nz); \
83         new_i   = new_j + new_nz; \
84  \
85         /* copy over old data into new slots */ \
86         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
87         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
88         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
89         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
90         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
91                                                            len*sizeof(int)); \
92         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
93         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
94                                                            len*sizeof(Scalar));  \
95         /* free up old matrix storage */ \
96  \
97         PetscFree(a->a);  \
98         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
99         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
100         a->singlemalloc = 1; \
101  \
102         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
103         rmax = imax[row] = imax[row] + CHUNKSIZE; \
104         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
105         a->maxnz += CHUNKSIZE; \
106         a->reallocs++; \
107       } \
108       N = nrow++ - 1; a->nz++; \
109       /* shift up all the later entries in this row */ \
110       for ( ii=N; ii>=_i; ii-- ) { \
111         rp[ii+1] = rp[ii]; \
112         ap[ii+1] = ap[ii]; \
113       } \
114       rp[_i] = col1;  \
115       ap[_i] = value;  \
116       _noinsert: ; \
117       ailen[row] = nrow; \
118 }
119 
120 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
121 #undef __FUNC__
122 #define __FUNC__ "MatSetValues_MPIAIJ"
123 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
124 {
125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
126   Scalar     value;
127   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
128   int        cstart = aij->cstart, cend = aij->cend,row,col;
129   int        roworiented = aij->roworiented;
130 
131   /* Some Variables required in the macro */
132   Mat        A = aij->A;
133   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
134   int        *rp,ii,nrow,_i,rmax, N, col1;
135   int        *imax = a->imax, *ai = a->i, *ailen = a->ilen;
136   int        *aj = a->j, nonew = a->nonew,shift = a->indexshift;
137   Scalar     *ap, *aa = a->a;
138 
139   for ( i=0; i<m; i++ ) {
140 #if defined(PETSC_BOPT_g)
141     if (im[i] < 0) SETERRQ(1,0,"Negative row");
142     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
143 #endif
144     if (im[i] >= rstart && im[i] < rend) {
145       row = im[i] - rstart;
146       for ( j=0; j<n; j++ ) {
147         if (in[j] >= cstart && in[j] < cend){
148           col = in[j] - cstart;
149           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
150           MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv);
151           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
152         }
153 #if defined(PETSC_BOPT_g)
154         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
155         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
156 #endif
157         else {
158           if (mat->was_assembled) {
159             if (!aij->colmap) {
160               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
161             }
162             col = aij->colmap[in[j]] - 1;
163             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
164               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
165               col =  in[j];
166             }
167           }
168           else col = in[j];
169           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
170           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
171         }
172       }
173     }
174     else {
175       if (roworiented && !aij->donotstash) {
176         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
177       }
178       else {
179         if (!aij->donotstash) {
180           row = im[i];
181           for ( j=0; j<n; j++ ) {
182             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
183           }
184         }
185       }
186     }
187   }
188   return 0;
189 }
190 
191 #undef __FUNC__
192 #define __FUNC__ "MatGetValues_MPIAIJ"
193 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
194 {
195   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198 
199   for ( i=0; i<m; i++ ) {
200     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
201     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
202     if (idxm[i] >= rstart && idxm[i] < rend) {
203       row = idxm[i] - rstart;
204       for ( j=0; j<n; j++ ) {
205         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
206         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
207         if (idxn[j] >= cstart && idxn[j] < cend){
208           col = idxn[j] - cstart;
209           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
210         }
211         else {
212           if (!aij->colmap) {
213             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
214           }
215           col = aij->colmap[idxn[j]] - 1;
216           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
217           else {
218             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
219           }
220         }
221       }
222     }
223     else {
224       SETERRQ(1,0,"Only local values currently supported");
225     }
226   }
227   return 0;
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
232 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
233 {
234   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
235   MPI_Comm    comm = mat->comm;
236   int         size = aij->size, *owners = aij->rowners;
237   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
238   MPI_Request *send_waits,*recv_waits;
239   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
240   InsertMode  addv;
241   Scalar      *rvalues,*svalues;
242 
243   /* make sure all processors are either in INSERTMODE or ADDMODE */
244   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
245   if (addv == (ADD_VALUES|INSERT_VALUES)) {
246     SETERRQ(1,0,"Some processors inserted others added");
247   }
248   mat->insertmode = addv; /* in case this processor had no cache */
249 
250   /*  first count number of contributors to each processor */
251   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
252   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
253   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
254   for ( i=0; i<aij->stash.n; i++ ) {
255     idx = aij->stash.idx[i];
256     for ( j=0; j<size; j++ ) {
257       if (idx >= owners[j] && idx < owners[j+1]) {
258         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
259       }
260     }
261   }
262   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
263 
264   /* inform other processors of number of messages and max length*/
265   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
266   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
267   nreceives = work[rank];
268   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
269   nmax = work[rank];
270   PetscFree(work);
271 
272   /* post receives:
273        1) each message will consist of ordered pairs
274      (global index,value) we store the global index as a double
275      to simplify the message passing.
276        2) since we don't know how long each individual message is we
277      allocate the largest needed buffer for each receive. Potentially
278      this is a lot of wasted space.
279 
280 
281        This could be done better.
282   */
283   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
284   CHKPTRQ(rvalues);
285   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
286   CHKPTRQ(recv_waits);
287   for ( i=0; i<nreceives; i++ ) {
288     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
289               comm,recv_waits+i);
290   }
291 
292   /* do sends:
293       1) starts[i] gives the starting index in svalues for stuff going to
294          the ith processor
295   */
296   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
297   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
298   CHKPTRQ(send_waits);
299   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
300   starts[0] = 0;
301   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
302   for ( i=0; i<aij->stash.n; i++ ) {
303     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
304     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
305     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
306   }
307   PetscFree(owner);
308   starts[0] = 0;
309   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
310   count = 0;
311   for ( i=0; i<size; i++ ) {
312     if (procs[i]) {
313       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
314                 comm,send_waits+count++);
315     }
316   }
317   PetscFree(starts); PetscFree(nprocs);
318 
319   /* Free cache space */
320   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
321   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
322 
323   aij->svalues    = svalues;    aij->rvalues    = rvalues;
324   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
325   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
326   aij->rmax       = nmax;
327 
328   return 0;
329 }
330 extern int MatSetUpMultiply_MPIAIJ(Mat);
331 
332 #undef __FUNC__
333 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
334 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
335 {
336   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
337   MPI_Status  *send_status,recv_status;
338   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
339   int         row,col,other_disassembled;
340   Scalar      *values,val;
341   InsertMode  addv = mat->insertmode;
342 
343   /*  wait on receives */
344   while (count) {
345     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
346     /* unpack receives into our local space */
347     values = aij->rvalues + 3*imdex*aij->rmax;
348     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
349     n = n/3;
350     for ( i=0; i<n; i++ ) {
351       row = (int) PetscReal(values[3*i]) - aij->rstart;
352       col = (int) PetscReal(values[3*i+1]);
353       val = values[3*i+2];
354       if (col >= aij->cstart && col < aij->cend) {
355         col -= aij->cstart;
356         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
357       }
358       else {
359         if (mat->was_assembled || mat->assembled) {
360           if (!aij->colmap) {
361             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
362           }
363           col = aij->colmap[col] - 1;
364           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
365             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
366             col = (int) PetscReal(values[3*i+1]);
367           }
368         }
369         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
370       }
371     }
372     count--;
373   }
374   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
375 
376   /* wait on sends */
377   if (aij->nsends) {
378     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
379     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
380     PetscFree(send_status);
381   }
382   PetscFree(aij->send_waits); PetscFree(aij->svalues);
383 
384   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
385   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
386 
387   /* determine if any processor has disassembled, if so we must
388      also disassemble ourselfs, in order that we may reassemble. */
389   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
390   if (mat->was_assembled && !other_disassembled) {
391     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
396   }
397   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
399 
400   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
401   return 0;
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 static int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
411   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
412   return 0;
413 }
414 
415 /* the code does not do the diagonal entries correctly unless the
416    matrix is square and the column and row owerships are identical.
417    This is a BUG. The only way to fix it seems to be to access
418    aij->A and aij->B directly and not through the MatZeroRows()
419    routine.
420 */
421 #undef __FUNC__
422 #define __FUNC__ "MatZeroRows_MPIAIJ"
423 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
426   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
427   int            *procs,*nprocs,j,found,idx,nsends,*work;
428   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
429   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
430   int            *lens,imdex,*lrows,*values;
431   MPI_Comm       comm = A->comm;
432   MPI_Request    *send_waits,*recv_waits;
433   MPI_Status     recv_status,*send_status;
434   IS             istmp;
435 
436   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
441   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
442   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
443   for ( i=0; i<N; i++ ) {
444     idx = rows[i];
445     found = 0;
446     for ( j=0; j<size; j++ ) {
447       if (idx >= owners[j] && idx < owners[j+1]) {
448         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
449       }
450     }
451     if (!found) SETERRQ(1,0,"Index out of range");
452   }
453   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
454 
455   /* inform other processors of number of messages and max length*/
456   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
457   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
458   nrecvs = work[rank];
459   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
460   nmax = work[rank];
461   PetscFree(work);
462 
463   /* post receives:   */
464   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
465   CHKPTRQ(rvalues);
466   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
467   CHKPTRQ(recv_waits);
468   for ( i=0; i<nrecvs; i++ ) {
469     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
477   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
478   CHKPTRQ(send_waits);
479   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
480   starts[0] = 0;
481   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
482   for ( i=0; i<N; i++ ) {
483     svalues[starts[owner[i]]++] = rows[i];
484   }
485   ISRestoreIndices(is,&rows);
486 
487   starts[0] = 0;
488   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
489   count = 0;
490   for ( i=0; i<size; i++ ) {
491     if (procs[i]) {
492       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
493     }
494   }
495   PetscFree(starts);
496 
497   base = owners[rank];
498 
499   /*  wait on receives */
500   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
501   source = lens + nrecvs;
502   count  = nrecvs; slen = 0;
503   while (count) {
504     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
505     /* unpack receives into our local space */
506     MPI_Get_count(&recv_status,MPI_INT,&n);
507     source[imdex]  = recv_status.MPI_SOURCE;
508     lens[imdex]  = n;
509     slen += n;
510     count--;
511   }
512   PetscFree(recv_waits);
513 
514   /* move the data into the send scatter */
515   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
516   count = 0;
517   for ( i=0; i<nrecvs; i++ ) {
518     values = rvalues + i*nmax;
519     for ( j=0; j<lens[i]; j++ ) {
520       lrows[count++] = values[j] - base;
521     }
522   }
523   PetscFree(rvalues); PetscFree(lens);
524   PetscFree(owner); PetscFree(nprocs);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PLogObjectParent(A,istmp);
529   PetscFree(lrows);
530   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
531   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
532   ierr = ISDestroy(istmp); CHKERRQ(ierr);
533 
534   /* wait on sends */
535   if (nsends) {
536     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
537     CHKPTRQ(send_status);
538     MPI_Waitall(nsends,send_waits,send_status);
539     PetscFree(send_status);
540   }
541   PetscFree(send_waits); PetscFree(svalues);
542 
543   return 0;
544 }
545 
546 #undef __FUNC__
547 #define __FUNC__ "MatMult_MPIAIJ"
548 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
549 {
550   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
551   int        ierr,nt;
552 
553   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
554   if (nt != a->n) {
555     SETERRQ(1,0,"Incompatible partition of A and xx");
556   }
557   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
558   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
559   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
560   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
561   return 0;
562 }
563 
564 #undef __FUNC__
565 #define __FUNC__ "MatMultAdd_MPIAIJ"
566 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
567 {
568   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
569   int        ierr;
570   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
572   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
573   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
574   return 0;
575 }
576 
577 #undef __FUNC__
578 #define __FUNC__ "MatMultTrans_MPIAIJ"
579 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
582   int        ierr;
583 
584   /* do nondiagonal part */
585   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
586   /* send it on its way */
587   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
588   /* do local part */
589   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
590   /* receive remote parts: note this assumes the values are not actually */
591   /* inserted in yy until the next line, which is true for my implementation*/
592   /* but is not perhaps always true. */
593   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
594   return 0;
595 }
596 
597 #undef __FUNC__
598 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
599 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
602   int        ierr;
603 
604   /* do nondiagonal part */
605   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
606   /* send it on its way */
607   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
608   /* do local part */
609   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
610   /* receive remote parts: note this assumes the values are not actually */
611   /* inserted in yy until the next line, which is true for my implementation*/
612   /* but is not perhaps always true. */
613   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
614   return 0;
615 }
616 
617 /*
618   This only works correctly for square matrices where the subblock A->A is the
619    diagonal block
620 */
621 #undef __FUNC__
622 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
623 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
624 {
625   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
626   if (a->M != a->N)
627     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
628   if (a->rstart != a->cstart || a->rend != a->cend) {
629     SETERRQ(1,0,"row partition must equal col partition");  }
630   return MatGetDiagonal(a->A,v);
631 }
632 
633 #undef __FUNC__
634 #define __FUNC__ "MatScale_MPIAIJ"
635 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
638   int        ierr;
639   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
640   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
641   return 0;
642 }
643 
644 #undef __FUNC__
645 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
646 static int MatDestroy_MPIAIJ(PetscObject obj)
647 {
648   Mat        mat = (Mat) obj;
649   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
650   int        ierr;
651 #if defined(PETSC_LOG)
652   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
653 #endif
654   PetscFree(aij->rowners);
655   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
656   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
657   if (aij->colmap) PetscFree(aij->colmap);
658   if (aij->garray) PetscFree(aij->garray);
659   if (aij->lvec)   VecDestroy(aij->lvec);
660   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
661   if (aij->rowvalues) PetscFree(aij->rowvalues);
662   PetscFree(aij);
663   if (mat->mapping) {
664     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
665   }
666   PLogObjectDestroy(mat);
667   PetscHeaderDestroy(mat);
668   return 0;
669 }
670 #include "draw.h"
671 #include "pinclude/pviewer.h"
672 
673 #undef __FUNC__
674 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
675 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
676 {
677   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
678   int         ierr;
679 
680   if (aij->size == 1) {
681     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
682   }
683   else SETERRQ(1,0,"Only uniprocessor output supported");
684   return 0;
685 }
686 
687 #undef __FUNC__
688 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
689 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
690 {
691   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
692   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
693   int         ierr, format,shift = C->indexshift,rank;
694   FILE        *fd;
695   ViewerType  vtype;
696 
697   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
698   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
699     ierr = ViewerGetFormat(viewer,&format);
700     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
701       MatInfo info;
702       int     flg;
703       MPI_Comm_rank(mat->comm,&rank);
704       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
705       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
706       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
707       PetscSequentialPhaseBegin(mat->comm,1);
708       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
709          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
710       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
711          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
712       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
713       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
714       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
715       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
716       fflush(fd);
717       PetscSequentialPhaseEnd(mat->comm,1);
718       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
719       return 0;
720     }
721     else if (format == VIEWER_FORMAT_ASCII_INFO) {
722       return 0;
723     }
724   }
725 
726   if (vtype == DRAW_VIEWER) {
727     Draw       draw;
728     PetscTruth isnull;
729     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
730     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
731   }
732 
733   if (vtype == ASCII_FILE_VIEWER) {
734     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
735     PetscSequentialPhaseBegin(mat->comm,1);
736     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
737            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
738            aij->cend);
739     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
740     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
741     fflush(fd);
742     PetscSequentialPhaseEnd(mat->comm,1);
743   }
744   else {
745     int size = aij->size;
746     rank = aij->rank;
747     if (size == 1) {
748       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
749     }
750     else {
751       /* assemble the entire matrix onto first processor. */
752       Mat         A;
753       Mat_SeqAIJ *Aloc;
754       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
755       Scalar      *a;
756 
757       if (!rank) {
758         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
759                CHKERRQ(ierr);
760       }
761       else {
762         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
763                CHKERRQ(ierr);
764       }
765       PLogObjectParent(mat,A);
766 
767       /* copy over the A part */
768       Aloc = (Mat_SeqAIJ*) aij->A->data;
769       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
770       row = aij->rstart;
771       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
772       for ( i=0; i<m; i++ ) {
773         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
774         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
775       }
776       aj = Aloc->j;
777       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
778 
779       /* copy over the B part */
780       Aloc = (Mat_SeqAIJ*) aij->B->data;
781       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
782       row = aij->rstart;
783       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
784       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
785       for ( i=0; i<m; i++ ) {
786         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
787         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
788       }
789       PetscFree(ct);
790       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
791       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
792       if (!rank) {
793         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
794       }
795       ierr = MatDestroy(A); CHKERRQ(ierr);
796     }
797   }
798   return 0;
799 }
800 
801 #undef __FUNC__
802 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
803 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
804 {
805   Mat         mat = (Mat) obj;
806   int         ierr;
807   ViewerType  vtype;
808 
809   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
810   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
811       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
812     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
813   }
814   else if (vtype == BINARY_FILE_VIEWER) {
815     return MatView_MPIAIJ_Binary(mat,viewer);
816   }
817   return 0;
818 }
819 
820 /*
821     This has to provide several versions.
822 
823      2) a) use only local smoothing updating outer values only once.
824         b) local smoothing updating outer values each inner iteration
825      3) color updating out values betwen colors.
826 */
827 #undef __FUNC__
828 #define __FUNC__ "MatRelax_MPIAIJ"
829 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
830                            double fshift,int its,Vec xx)
831 {
832   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
833   Mat        AA = mat->A, BB = mat->B;
834   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
835   Scalar     *b,*x,*xs,*ls,d,*v,sum;
836   int        ierr,*idx, *diag;
837   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
838 
839   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
840   xs = x + shift; /* shift by one for index start of 1 */
841   ls = ls + shift;
842   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
843   diag = A->diag;
844   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
845     if (flag & SOR_ZERO_INITIAL_GUESS) {
846       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
847     }
848     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
849     CHKERRQ(ierr);
850     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
851     CHKERRQ(ierr);
852     while (its--) {
853       /* go down through the rows */
854       for ( i=0; i<m; i++ ) {
855         n    = A->i[i+1] - A->i[i];
856         idx  = A->j + A->i[i] + shift;
857         v    = A->a + A->i[i] + shift;
858         sum  = b[i];
859         SPARSEDENSEMDOT(sum,xs,v,idx,n);
860         d    = fshift + A->a[diag[i]+shift];
861         n    = B->i[i+1] - B->i[i];
862         idx  = B->j + B->i[i] + shift;
863         v    = B->a + B->i[i] + shift;
864         SPARSEDENSEMDOT(sum,ls,v,idx,n);
865         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
866       }
867       /* come up through the rows */
868       for ( i=m-1; i>-1; i-- ) {
869         n    = A->i[i+1] - A->i[i];
870         idx  = A->j + A->i[i] + shift;
871         v    = A->a + A->i[i] + shift;
872         sum  = b[i];
873         SPARSEDENSEMDOT(sum,xs,v,idx,n);
874         d    = fshift + A->a[diag[i]+shift];
875         n    = B->i[i+1] - B->i[i];
876         idx  = B->j + B->i[i] + shift;
877         v    = B->a + B->i[i] + shift;
878         SPARSEDENSEMDOT(sum,ls,v,idx,n);
879         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
880       }
881     }
882   }
883   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
884     if (flag & SOR_ZERO_INITIAL_GUESS) {
885       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
886     }
887     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
888     CHKERRQ(ierr);
889     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
890     CHKERRQ(ierr);
891     while (its--) {
892       for ( i=0; i<m; i++ ) {
893         n    = A->i[i+1] - A->i[i];
894         idx  = A->j + A->i[i] + shift;
895         v    = A->a + A->i[i] + shift;
896         sum  = b[i];
897         SPARSEDENSEMDOT(sum,xs,v,idx,n);
898         d    = fshift + A->a[diag[i]+shift];
899         n    = B->i[i+1] - B->i[i];
900         idx  = B->j + B->i[i] + shift;
901         v    = B->a + B->i[i] + shift;
902         SPARSEDENSEMDOT(sum,ls,v,idx,n);
903         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
904       }
905     }
906   }
907   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
908     if (flag & SOR_ZERO_INITIAL_GUESS) {
909       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
910     }
911     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
912                             mat->Mvctx); CHKERRQ(ierr);
913     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
914                             mat->Mvctx); CHKERRQ(ierr);
915     while (its--) {
916       for ( i=m-1; i>-1; i-- ) {
917         n    = A->i[i+1] - A->i[i];
918         idx  = A->j + A->i[i] + shift;
919         v    = A->a + A->i[i] + shift;
920         sum  = b[i];
921         SPARSEDENSEMDOT(sum,xs,v,idx,n);
922         d    = fshift + A->a[diag[i]+shift];
923         n    = B->i[i+1] - B->i[i];
924         idx  = B->j + B->i[i] + shift;
925         v    = B->a + B->i[i] + shift;
926         SPARSEDENSEMDOT(sum,ls,v,idx,n);
927         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
928       }
929     }
930   }
931   else {
932     SETERRQ(1,0,"Option not supported");
933   }
934   return 0;
935 }
936 
937 #undef __FUNC__
938 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
939 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
940 {
941   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
942   Mat        A = mat->A, B = mat->B;
943   int        ierr;
944   double     isend[5], irecv[5];
945 
946   info->rows_global    = (double)mat->M;
947   info->columns_global = (double)mat->N;
948   info->rows_local     = (double)mat->m;
949   info->columns_local  = (double)mat->N;
950   info->block_size     = 1.0;
951   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
952   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
953   isend[3] = info->memory;  isend[4] = info->mallocs;
954   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
955   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
956   isend[3] += info->memory;  isend[4] += info->mallocs;
957   if (flag == MAT_LOCAL) {
958     info->nz_used      = isend[0];
959     info->nz_allocated = isend[1];
960     info->nz_unneeded  = isend[2];
961     info->memory       = isend[3];
962     info->mallocs      = isend[4];
963   } else if (flag == MAT_GLOBAL_MAX) {
964     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
965     info->nz_used      = irecv[0];
966     info->nz_allocated = irecv[1];
967     info->nz_unneeded  = irecv[2];
968     info->memory       = irecv[3];
969     info->mallocs      = irecv[4];
970   } else if (flag == MAT_GLOBAL_SUM) {
971     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
972     info->nz_used      = irecv[0];
973     info->nz_allocated = irecv[1];
974     info->nz_unneeded  = irecv[2];
975     info->memory       = irecv[3];
976     info->mallocs      = irecv[4];
977   }
978   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
979   info->fill_ratio_needed = 0;
980   info->factor_mallocs    = 0;
981 
982   return 0;
983 }
984 
985 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
986 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
987 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
988 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
989 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
990 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
991 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
992 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
993 
994 #undef __FUNC__
995 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
996 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
997 {
998   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
999 
1000   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1001       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1002       op == MAT_COLUMNS_UNSORTED ||
1003       op == MAT_COLUMNS_SORTED) {
1004         MatSetOption(a->A,op);
1005         MatSetOption(a->B,op);
1006   } else if (op == MAT_ROW_ORIENTED) {
1007     a->roworiented = 1;
1008     MatSetOption(a->A,op);
1009     MatSetOption(a->B,op);
1010   } else if (op == MAT_ROWS_SORTED ||
1011              op == MAT_ROWS_UNSORTED ||
1012              op == MAT_SYMMETRIC ||
1013              op == MAT_STRUCTURALLY_SYMMETRIC ||
1014              op == MAT_YES_NEW_DIAGONALS)
1015     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1016   else if (op == MAT_COLUMN_ORIENTED) {
1017     a->roworiented = 0;
1018     MatSetOption(a->A,op);
1019     MatSetOption(a->B,op);
1020   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
1021     a->donotstash = 1;
1022   } else if (op == MAT_NO_NEW_DIAGONALS)
1023     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1024   else
1025     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1026   return 0;
1027 }
1028 
1029 #undef __FUNC__
1030 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1031 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1032 {
1033   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1034   *m = mat->M; *n = mat->N;
1035   return 0;
1036 }
1037 
1038 #undef __FUNC__
1039 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1040 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1041 {
1042   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1043   *m = mat->m; *n = mat->N;
1044   return 0;
1045 }
1046 
1047 #undef __FUNC__
1048 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1049 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1050 {
1051   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1052   *m = mat->rstart; *n = mat->rend;
1053   return 0;
1054 }
1055 
1056 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1057 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1058 
1059 #undef __FUNC__
1060 #define __FUNC__ "MatGetRow_MPIAIJ"
1061 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1062 {
1063   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1064   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1065   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1066   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1067   int        *cmap, *idx_p;
1068 
1069   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1070   mat->getrowactive = PETSC_TRUE;
1071 
1072   if (!mat->rowvalues && (idx || v)) {
1073     /*
1074         allocate enough space to hold information from the longest row.
1075     */
1076     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1077     int     max = 1,m = mat->m,tmp;
1078     for ( i=0; i<m; i++ ) {
1079       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1080       if (max < tmp) { max = tmp; }
1081     }
1082     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1083     CHKPTRQ(mat->rowvalues);
1084     mat->rowindices = (int *) (mat->rowvalues + max);
1085   }
1086 
1087   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1088   lrow = row - rstart;
1089 
1090   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1091   if (!v)   {pvA = 0; pvB = 0;}
1092   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1093   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1094   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1095   nztot = nzA + nzB;
1096 
1097   cmap  = mat->garray;
1098   if (v  || idx) {
1099     if (nztot) {
1100       /* Sort by increasing column numbers, assuming A and B already sorted */
1101       int imark = -1;
1102       if (v) {
1103         *v = v_p = mat->rowvalues;
1104         for ( i=0; i<nzB; i++ ) {
1105           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1106           else break;
1107         }
1108         imark = i;
1109         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1110         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1111       }
1112       if (idx) {
1113         *idx = idx_p = mat->rowindices;
1114         if (imark > -1) {
1115           for ( i=0; i<imark; i++ ) {
1116             idx_p[i] = cmap[cworkB[i]];
1117           }
1118         } else {
1119           for ( i=0; i<nzB; i++ ) {
1120             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1121             else break;
1122           }
1123           imark = i;
1124         }
1125         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1126         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1127       }
1128     }
1129     else {
1130       if (idx) *idx = 0;
1131       if (v)   *v   = 0;
1132     }
1133   }
1134   *nz = nztot;
1135   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1136   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1137   return 0;
1138 }
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1142 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1143 {
1144   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1145   if (aij->getrowactive == PETSC_FALSE) {
1146     SETERRQ(1,0,"MatGetRow not called");
1147   }
1148   aij->getrowactive = PETSC_FALSE;
1149   return 0;
1150 }
1151 
1152 #undef __FUNC__
1153 #define __FUNC__ "MatNorm_MPIAIJ"
1154 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1155 {
1156   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1157   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1158   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1159   double     sum = 0.0;
1160   Scalar     *v;
1161 
1162   if (aij->size == 1) {
1163     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1164   } else {
1165     if (type == NORM_FROBENIUS) {
1166       v = amat->a;
1167       for (i=0; i<amat->nz; i++ ) {
1168 #if defined(PETSC_COMPLEX)
1169         sum += real(conj(*v)*(*v)); v++;
1170 #else
1171         sum += (*v)*(*v); v++;
1172 #endif
1173       }
1174       v = bmat->a;
1175       for (i=0; i<bmat->nz; i++ ) {
1176 #if defined(PETSC_COMPLEX)
1177         sum += real(conj(*v)*(*v)); v++;
1178 #else
1179         sum += (*v)*(*v); v++;
1180 #endif
1181       }
1182       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1183       *norm = sqrt(*norm);
1184     }
1185     else if (type == NORM_1) { /* max column norm */
1186       double *tmp, *tmp2;
1187       int    *jj, *garray = aij->garray;
1188       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1189       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1190       PetscMemzero(tmp,aij->N*sizeof(double));
1191       *norm = 0.0;
1192       v = amat->a; jj = amat->j;
1193       for ( j=0; j<amat->nz; j++ ) {
1194         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1195       }
1196       v = bmat->a; jj = bmat->j;
1197       for ( j=0; j<bmat->nz; j++ ) {
1198         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1199       }
1200       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1201       for ( j=0; j<aij->N; j++ ) {
1202         if (tmp2[j] > *norm) *norm = tmp2[j];
1203       }
1204       PetscFree(tmp); PetscFree(tmp2);
1205     }
1206     else if (type == NORM_INFINITY) { /* max row norm */
1207       double ntemp = 0.0;
1208       for ( j=0; j<amat->m; j++ ) {
1209         v = amat->a + amat->i[j] + shift;
1210         sum = 0.0;
1211         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1212           sum += PetscAbsScalar(*v); v++;
1213         }
1214         v = bmat->a + bmat->i[j] + shift;
1215         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1216           sum += PetscAbsScalar(*v); v++;
1217         }
1218         if (sum > ntemp) ntemp = sum;
1219       }
1220       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1221     }
1222     else {
1223       SETERRQ(1,0,"No support for two norm");
1224     }
1225   }
1226   return 0;
1227 }
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ "MatTranspose_MPIAIJ"
1231 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1232 {
1233   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1234   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1235   int        ierr,shift = Aloc->indexshift;
1236   Mat        B;
1237   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1238   Scalar     *array;
1239 
1240   if (matout == PETSC_NULL && M != N)
1241     SETERRQ(1,0,"Square matrix only for in-place");
1242   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1243          PETSC_NULL,&B); CHKERRQ(ierr);
1244 
1245   /* copy over the A part */
1246   Aloc = (Mat_SeqAIJ*) a->A->data;
1247   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1248   row = a->rstart;
1249   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1250   for ( i=0; i<m; i++ ) {
1251     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1252     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1253   }
1254   aj = Aloc->j;
1255   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1256 
1257   /* copy over the B part */
1258   Aloc = (Mat_SeqAIJ*) a->B->data;
1259   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1260   row = a->rstart;
1261   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1262   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1263   for ( i=0; i<m; i++ ) {
1264     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1265     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1266   }
1267   PetscFree(ct);
1268   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1269   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1270   if (matout != PETSC_NULL) {
1271     *matout = B;
1272   } else {
1273     /* This isn't really an in-place transpose .... but free data structures from a */
1274     PetscFree(a->rowners);
1275     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1276     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1277     if (a->colmap) PetscFree(a->colmap);
1278     if (a->garray) PetscFree(a->garray);
1279     if (a->lvec) VecDestroy(a->lvec);
1280     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1281     PetscFree(a);
1282     PetscMemcpy(A,B,sizeof(struct _Mat));
1283     PetscHeaderDestroy(B);
1284   }
1285   return 0;
1286 }
1287 
1288 #undef __FUNC__
1289 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1290 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1291 {
1292   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1293   Mat a = aij->A, b = aij->B;
1294   int ierr,s1,s2,s3;
1295 
1296   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1297   if (rr) {
1298     s3 = aij->n;
1299     VecGetLocalSize_Fast(rr,s1);
1300     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1301     /* Overlap communication with computation. */
1302     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1303   }
1304   if (ll) {
1305     VecGetLocalSize_Fast(ll,s1);
1306     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1307     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1308   }
1309   /* scale  the diagonal block */
1310   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1311 
1312   if (rr) {
1313     /* Do a scatter end and then right scale the off-diagonal block */
1314     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1315     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1316   }
1317 
1318   return 0;
1319 }
1320 
1321 
1322 extern int MatPrintHelp_SeqAIJ(Mat);
1323 #undef __FUNC__
1324 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1325 static int MatPrintHelp_MPIAIJ(Mat A)
1326 {
1327   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1328 
1329   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1330   else return 0;
1331 }
1332 
1333 #undef __FUNC__
1334 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1335 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1336 {
1337   *bs = 1;
1338   return 0;
1339 }
1340 #undef __FUNC__
1341 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1342 static int MatSetUnfactored_MPIAIJ(Mat A)
1343 {
1344   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1345   int        ierr;
1346   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1347   return 0;
1348 }
1349 
1350 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1351 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1352 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1353 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1354 /* -------------------------------------------------------------------*/
1355 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1356        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1357        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1358        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1359        0,0,
1360        0,0,
1361        0,0,
1362        MatRelax_MPIAIJ,
1363        MatTranspose_MPIAIJ,
1364        MatGetInfo_MPIAIJ,0,
1365        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1366        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1367        0,
1368        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1369        0,0,0,0,
1370        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1371        0,0,
1372        0,0,MatConvertSameType_MPIAIJ,0,0,
1373        0,0,0,
1374        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1375        MatPrintHelp_MPIAIJ,
1376        MatScale_MPIAIJ,0,0,0,
1377        MatGetBlockSize_MPIAIJ,0,0,0,0,
1378        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1379 
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatCreateMPIAIJ"
1383 /*@C
1384    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1385    (the default parallel PETSc format).  For good matrix assembly performance
1386    the user should preallocate the matrix storage by setting the parameters
1387    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1388    performance can be increased by more than a factor of 50.
1389 
1390    Input Parameters:
1391 .  comm - MPI communicator
1392 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1393            This value should be the same as the local size used in creating the
1394            y vector for the matrix-vector product y = Ax.
1395 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1396            This value should be the same as the local size used in creating the
1397            x vector for the matrix-vector product y = Ax.
1398 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1399 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1400 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1401            (same for all local rows)
1402 .  d_nzz - array containing the number of nonzeros in the various rows of the
1403            diagonal portion of the local submatrix (possibly different for each row)
1404            or PETSC_NULL. You must leave room for the diagonal entry even if
1405            it is zero.
1406 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1407            submatrix (same for all local rows).
1408 .  o_nzz - array containing the number of nonzeros in the various rows of the
1409            off-diagonal portion of the local submatrix (possibly different for
1410            each row) or PETSC_NULL.
1411 
1412    Output Parameter:
1413 .  A - the matrix
1414 
1415    Notes:
1416    The AIJ format (also called the Yale sparse matrix format or
1417    compressed row storage), is fully compatible with standard Fortran 77
1418    storage.  That is, the stored row and column indices can begin at
1419    either one (as in Fortran) or zero.  See the users manual for details.
1420 
1421    The user MUST specify either the local or global matrix dimensions
1422    (possibly both).
1423 
1424    By default, this format uses inodes (identical nodes) when possible.
1425    We search for consecutive rows with the same nonzero structure, thereby
1426    reusing matrix information to achieve increased efficiency.
1427 
1428    Options Database Keys:
1429 $    -mat_aij_no_inode  - Do not use inodes
1430 $    -mat_aij_inode_limit <limit> - Set inode limit.
1431 $        (max limit=5)
1432 $    -mat_aij_oneindex - Internally use indexing starting at 1
1433 $        rather than 0.  Note: When calling MatSetValues(),
1434 $        the user still MUST index entries starting at 0!
1435 
1436    Storage Information:
1437    For a square global matrix we define each processor's diagonal portion
1438    to be its local rows and the corresponding columns (a square submatrix);
1439    each processor's off-diagonal portion encompasses the remainder of the
1440    local matrix (a rectangular submatrix).
1441 
1442    The user can specify preallocated storage for the diagonal part of
1443    the local submatrix with either d_nz or d_nnz (not both).  Set
1444    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1445    memory allocation.  Likewise, specify preallocated storage for the
1446    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1447 
1448    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1449    the figure below we depict these three local rows and all columns (0-11).
1450 
1451 $          0 1 2 3 4 5 6 7 8 9 10 11
1452 $         -------------------
1453 $  row 3  |  o o o d d d o o o o o o
1454 $  row 4  |  o o o d d d o o o o o o
1455 $  row 5  |  o o o d d d o o o o o o
1456 $         -------------------
1457 $
1458 
1459    Thus, any entries in the d locations are stored in the d (diagonal)
1460    submatrix, and any entries in the o locations are stored in the
1461    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1462    stored simply in the MATSEQAIJ format for compressed row storage.
1463 
1464    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1465    and o_nz should indicate the number of nonzeros per row in the o matrix.
1466    In general, for PDE problems in which most nonzeros are near the diagonal,
1467    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1468    or you will get TERRIBLE performance; see the users' manual chapter on
1469    matrices.
1470 
1471 .keywords: matrix, aij, compressed row, sparse, parallel
1472 
1473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1474 @*/
1475 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1476                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1477 {
1478   Mat          B;
1479   Mat_MPIAIJ   *b;
1480   int          ierr, i,sum[2],work[2],size;
1481 
1482   *A = 0;
1483   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1484   PLogObjectCreate(B);
1485   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1486   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1487   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1488   MPI_Comm_size(comm,&size);
1489   if (size == 1) {
1490     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1491     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1492     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1493     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1494     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1495     B->ops.solve             = MatSolve_MPIAIJ;
1496     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1497     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1498     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1499     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1500   }
1501   B->destroy    = MatDestroy_MPIAIJ;
1502   B->view       = MatView_MPIAIJ;
1503   B->factor     = 0;
1504   B->assembled  = PETSC_FALSE;
1505   B->mapping    = 0;
1506 
1507   B->insertmode = NOT_SET_VALUES;
1508   MPI_Comm_rank(comm,&b->rank);
1509   MPI_Comm_size(comm,&b->size);
1510 
1511   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1512     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1513 
1514   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1515     work[0] = m; work[1] = n;
1516     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1517     if (M == PETSC_DECIDE) M = sum[0];
1518     if (N == PETSC_DECIDE) N = sum[1];
1519   }
1520   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1521   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1522   b->m = m; B->m = m;
1523   b->n = n; B->n = n;
1524   b->N = N; B->N = N;
1525   b->M = M; B->M = M;
1526 
1527   /* build local table of row and column ownerships */
1528   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1529   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1530   b->cowners = b->rowners + b->size + 2;
1531   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1532   b->rowners[0] = 0;
1533   for ( i=2; i<=b->size; i++ ) {
1534     b->rowners[i] += b->rowners[i-1];
1535   }
1536   b->rstart = b->rowners[b->rank];
1537   b->rend   = b->rowners[b->rank+1];
1538   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1539   b->cowners[0] = 0;
1540   for ( i=2; i<=b->size; i++ ) {
1541     b->cowners[i] += b->cowners[i-1];
1542   }
1543   b->cstart = b->cowners[b->rank];
1544   b->cend   = b->cowners[b->rank+1];
1545 
1546   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1547   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1548   PLogObjectParent(B,b->A);
1549   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1550   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1551   PLogObjectParent(B,b->B);
1552 
1553   /* build cache for off array entries formed */
1554   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1555   b->donotstash  = 0;
1556   b->colmap      = 0;
1557   b->garray      = 0;
1558   b->roworiented = 1;
1559 
1560   /* stuff used for matrix vector multiply */
1561   b->lvec      = 0;
1562   b->Mvctx     = 0;
1563 
1564   /* stuff for MatGetRow() */
1565   b->rowindices   = 0;
1566   b->rowvalues    = 0;
1567   b->getrowactive = PETSC_FALSE;
1568 
1569   *A = B;
1570   return 0;
1571 }
1572 
1573 #undef __FUNC__
1574 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1575 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1576 {
1577   Mat        mat;
1578   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1579   int        ierr, len=0, flg;
1580 
1581   *newmat       = 0;
1582   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1583   PLogObjectCreate(mat);
1584   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1585   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1586   mat->destroy    = MatDestroy_MPIAIJ;
1587   mat->view       = MatView_MPIAIJ;
1588   mat->factor     = matin->factor;
1589   mat->assembled  = PETSC_TRUE;
1590 
1591   a->m = mat->m   = oldmat->m;
1592   a->n = mat->n   = oldmat->n;
1593   a->M = mat->M   = oldmat->M;
1594   a->N = mat->N   = oldmat->N;
1595 
1596   a->rstart       = oldmat->rstart;
1597   a->rend         = oldmat->rend;
1598   a->cstart       = oldmat->cstart;
1599   a->cend         = oldmat->cend;
1600   a->size         = oldmat->size;
1601   a->rank         = oldmat->rank;
1602   mat->insertmode = NOT_SET_VALUES;
1603   a->rowvalues    = 0;
1604   a->getrowactive = PETSC_FALSE;
1605 
1606   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1607   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1608   a->cowners = a->rowners + a->size + 2;
1609   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1610   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1611   if (oldmat->colmap) {
1612     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1613     PLogObjectMemory(mat,(a->N)*sizeof(int));
1614     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1615   } else a->colmap = 0;
1616   if (oldmat->garray) {
1617     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1618     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1619     PLogObjectMemory(mat,len*sizeof(int));
1620     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1621   } else a->garray = 0;
1622 
1623   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1624   PLogObjectParent(mat,a->lvec);
1625   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1626   PLogObjectParent(mat,a->Mvctx);
1627   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1628   PLogObjectParent(mat,a->A);
1629   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1630   PLogObjectParent(mat,a->B);
1631   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1632   if (flg) {
1633     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1634   }
1635   *newmat = mat;
1636   return 0;
1637 }
1638 
1639 #include "sys.h"
1640 
1641 #undef __FUNC__
1642 #define __FUNC__ "MatLoad_MPIAIJ"
1643 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1644 {
1645   Mat          A;
1646   int          i, nz, ierr, j,rstart, rend, fd;
1647   Scalar       *vals,*svals;
1648   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1649   MPI_Status   status;
1650   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1651   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1652   int          tag = ((PetscObject)viewer)->tag;
1653 
1654   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1655   if (!rank) {
1656     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1657     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1658     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1659   }
1660 
1661   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1662   M = header[1]; N = header[2];
1663   /* determine ownership of all rows */
1664   m = M/size + ((M % size) > rank);
1665   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1666   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1667   rowners[0] = 0;
1668   for ( i=2; i<=size; i++ ) {
1669     rowners[i] += rowners[i-1];
1670   }
1671   rstart = rowners[rank];
1672   rend   = rowners[rank+1];
1673 
1674   /* distribute row lengths to all processors */
1675   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1676   offlens = ourlens + (rend-rstart);
1677   if (!rank) {
1678     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1679     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1680     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1681     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1682     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1683     PetscFree(sndcounts);
1684   }
1685   else {
1686     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1687   }
1688 
1689   if (!rank) {
1690     /* calculate the number of nonzeros on each processor */
1691     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1692     PetscMemzero(procsnz,size*sizeof(int));
1693     for ( i=0; i<size; i++ ) {
1694       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1695         procsnz[i] += rowlengths[j];
1696       }
1697     }
1698     PetscFree(rowlengths);
1699 
1700     /* determine max buffer needed and allocate it */
1701     maxnz = 0;
1702     for ( i=0; i<size; i++ ) {
1703       maxnz = PetscMax(maxnz,procsnz[i]);
1704     }
1705     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1706 
1707     /* read in my part of the matrix column indices  */
1708     nz = procsnz[0];
1709     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1710     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1711 
1712     /* read in every one elses and ship off */
1713     for ( i=1; i<size; i++ ) {
1714       nz = procsnz[i];
1715       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1716       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1717     }
1718     PetscFree(cols);
1719   }
1720   else {
1721     /* determine buffer space needed for message */
1722     nz = 0;
1723     for ( i=0; i<m; i++ ) {
1724       nz += ourlens[i];
1725     }
1726     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1727 
1728     /* receive message of column indices*/
1729     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1730     MPI_Get_count(&status,MPI_INT,&maxnz);
1731     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1732   }
1733 
1734   /* loop over local rows, determining number of off diagonal entries */
1735   PetscMemzero(offlens,m*sizeof(int));
1736   jj = 0;
1737   for ( i=0; i<m; i++ ) {
1738     for ( j=0; j<ourlens[i]; j++ ) {
1739       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1740       jj++;
1741     }
1742   }
1743 
1744   /* create our matrix */
1745   for ( i=0; i<m; i++ ) {
1746     ourlens[i] -= offlens[i];
1747   }
1748   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1749   A = *newmat;
1750   MatSetOption(A,MAT_COLUMNS_SORTED);
1751   for ( i=0; i<m; i++ ) {
1752     ourlens[i] += offlens[i];
1753   }
1754 
1755   if (!rank) {
1756     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1757 
1758     /* read in my part of the matrix numerical values  */
1759     nz = procsnz[0];
1760     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1761 
1762     /* insert into matrix */
1763     jj      = rstart;
1764     smycols = mycols;
1765     svals   = vals;
1766     for ( i=0; i<m; i++ ) {
1767       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1768       smycols += ourlens[i];
1769       svals   += ourlens[i];
1770       jj++;
1771     }
1772 
1773     /* read in other processors and ship out */
1774     for ( i=1; i<size; i++ ) {
1775       nz = procsnz[i];
1776       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1777       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1778     }
1779     PetscFree(procsnz);
1780   }
1781   else {
1782     /* receive numeric values */
1783     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1784 
1785     /* receive message of values*/
1786     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1787     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1788     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1789 
1790     /* insert into matrix */
1791     jj      = rstart;
1792     smycols = mycols;
1793     svals   = vals;
1794     for ( i=0; i<m; i++ ) {
1795       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1796       smycols += ourlens[i];
1797       svals   += ourlens[i];
1798       jj++;
1799     }
1800   }
1801   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1802 
1803   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1804   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1805   return 0;
1806 }
1807