xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c2653b3d308917c4b5698e5f59fd76add26d7f41)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.192 1997/03/09 17:58:24 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 
22   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23   PLogObjectMemory(mat,aij->N*sizeof(int));
24   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
26   return 0;
27 }
28 
29 extern int DisAssemble_MPIAIJ(Mat);
30 
31 #undef __FUNC__
32 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
34                            PetscTruth *done)
35 {
36   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37   int        ierr;
38   if (aij->size == 1) {
39     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40   } else SETERRQ(1,0,"not supported in parallel");
41   return 0;
42 }
43 
44 #undef __FUNC__
45 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
47                                PetscTruth *done)
48 {
49   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
50   int        ierr;
51   if (aij->size == 1) {
52     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53   } else SETERRQ(1,0,"not supported in parallel");
54   return 0;
55 }
56 
57 #define CHUNKSIZE   15
58 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \
59 { \
60  \
61     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
62     rmax = imax[row]; nrow = ailen[row];  \
63     col1 = col - shift; \
64      \
65       for ( _i=0; _i<nrow; _i++ ) { \
66         if (rp[_i] > col1) break; \
67         if (rp[_i] == col1) { \
68           if (addv == ADD_VALUES) ap[_i] += value;   \
69           else                  ap[_i] = value; \
70           goto _noinsert; \
71         } \
72       }  \
73       if (nonew)  goto _noinsert; \
74       if (nrow >= rmax) { \
75         /* there is no extra room in row, therefore enlarge */ \
76         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
77         Scalar *new_a; \
78  \
79         /* malloc new storage space */ \
80         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
81         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
82         new_j   = (int *) (new_a + new_nz); \
83         new_i   = new_j + new_nz; \
84  \
85         /* copy over old data into new slots */ \
86         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
87         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
88         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
89         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
90         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
91                                                            len*sizeof(int)); \
92         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
93         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
94                                                            len*sizeof(Scalar));  \
95         /* free up old matrix storage */ \
96  \
97         PetscFree(a->a);  \
98         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
99         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
100         a->singlemalloc = 1; \
101  \
102         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
103         rmax = imax[row] = imax[row] + CHUNKSIZE; \
104         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
105         a->maxnz += CHUNKSIZE; \
106         a->reallocs++; \
107       } \
108       N = nrow++ - 1; a->nz++; \
109       /* shift up all the later entries in this row */ \
110       for ( ii=N; ii>=_i; ii-- ) { \
111         rp[ii+1] = rp[ii]; \
112         ap[ii+1] = ap[ii]; \
113       } \
114       rp[_i] = col1;  \
115       ap[_i] = value;  \
116       _noinsert: ; \
117       ailen[row] = nrow; \
118 }
119 
120 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
121 #undef __FUNC__
122 #define __FUNC__ "MatSetValues_MPIAIJ"
123 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
124 {
125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
126   Scalar     value;
127   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
128   int        cstart = aij->cstart, cend = aij->cend,row,col;
129   int        roworiented = aij->roworiented;
130 
131   /* Some Variables required in the macro */
132   Mat        A = aij->A;
133   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
134   int        *rp,ii,nrow,_i,rmax, N, col1;
135   int        *imax = a->imax, *ai = a->i, *ailen = a->ilen;
136   int        *aj = a->j, nonew = a->nonew,shift = a->indexshift;
137   Scalar     *ap, *aa = a->a;
138 
139   for ( i=0; i<m; i++ ) {
140 #if defined(PETSC_BOPT_g)
141     if (im[i] < 0) SETERRQ(1,0,"Negative row");
142     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
143 #endif
144     if (im[i] >= rstart && im[i] < rend) {
145       row = im[i] - rstart;
146       for ( j=0; j<n; j++ ) {
147         if (in[j] >= cstart && in[j] < cend){
148           col = in[j] - cstart;
149           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
150           MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv);
151           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
152         }
153 #if defined(PETSC_BOPT_g)
154         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
155         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
156 #endif
157         else {
158           if (mat->was_assembled) {
159             if (!aij->colmap) {
160               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
161             }
162             col = aij->colmap[in[j]] - 1;
163             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
164               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
165               col =  in[j];
166             }
167           }
168           else col = in[j];
169           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
170           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
171         }
172       }
173     }
174     else {
175       if (roworiented && !aij->donotstash) {
176         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
177       }
178       else {
179         if (!aij->donotstash) {
180           row = im[i];
181           for ( j=0; j<n; j++ ) {
182             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
183           }
184         }
185       }
186     }
187   }
188   return 0;
189 }
190 
191 #undef __FUNC__
192 #define __FUNC__ "MatGetValues_MPIAIJ"
193 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
194 {
195   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198 
199   for ( i=0; i<m; i++ ) {
200     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
201     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
202     if (idxm[i] >= rstart && idxm[i] < rend) {
203       row = idxm[i] - rstart;
204       for ( j=0; j<n; j++ ) {
205         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
206         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
207         if (idxn[j] >= cstart && idxn[j] < cend){
208           col = idxn[j] - cstart;
209           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
210         }
211         else {
212           if (!aij->colmap) {
213             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
214           }
215           col = aij->colmap[idxn[j]] - 1;
216           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
217           else {
218             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
219           }
220         }
221       }
222     }
223     else {
224       SETERRQ(1,0,"Only local values currently supported");
225     }
226   }
227   return 0;
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
232 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
233 {
234   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
235   MPI_Comm    comm = mat->comm;
236   int         size = aij->size, *owners = aij->rowners;
237   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
238   MPI_Request *send_waits,*recv_waits;
239   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
240   InsertMode  addv;
241   Scalar      *rvalues,*svalues;
242 
243   /* make sure all processors are either in INSERTMODE or ADDMODE */
244   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
245   if (addv == (ADD_VALUES|INSERT_VALUES)) {
246     SETERRQ(1,0,"Some processors inserted others added");
247   }
248   mat->insertmode = addv; /* in case this processor had no cache */
249 
250   /*  first count number of contributors to each processor */
251   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
252   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
253   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
254   for ( i=0; i<aij->stash.n; i++ ) {
255     idx = aij->stash.idx[i];
256     for ( j=0; j<size; j++ ) {
257       if (idx >= owners[j] && idx < owners[j+1]) {
258         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
259       }
260     }
261   }
262   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
263 
264   /* inform other processors of number of messages and max length*/
265   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
266   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
267   nreceives = work[rank];
268   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
269   nmax = work[rank];
270   PetscFree(work);
271 
272   /* post receives:
273        1) each message will consist of ordered pairs
274      (global index,value) we store the global index as a double
275      to simplify the message passing.
276        2) since we don't know how long each individual message is we
277      allocate the largest needed buffer for each receive. Potentially
278      this is a lot of wasted space.
279 
280 
281        This could be done better.
282   */
283   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
284   CHKPTRQ(rvalues);
285   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
286   CHKPTRQ(recv_waits);
287   for ( i=0; i<nreceives; i++ ) {
288     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
289               comm,recv_waits+i);
290   }
291 
292   /* do sends:
293       1) starts[i] gives the starting index in svalues for stuff going to
294          the ith processor
295   */
296   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
297   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
298   CHKPTRQ(send_waits);
299   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
300   starts[0] = 0;
301   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
302   for ( i=0; i<aij->stash.n; i++ ) {
303     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
304     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
305     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
306   }
307   PetscFree(owner);
308   starts[0] = 0;
309   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
310   count = 0;
311   for ( i=0; i<size; i++ ) {
312     if (procs[i]) {
313       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
314                 comm,send_waits+count++);
315     }
316   }
317   PetscFree(starts); PetscFree(nprocs);
318 
319   /* Free cache space */
320   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
321   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
322 
323   aij->svalues    = svalues;    aij->rvalues    = rvalues;
324   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
325   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
326   aij->rmax       = nmax;
327 
328   return 0;
329 }
330 extern int MatSetUpMultiply_MPIAIJ(Mat);
331 
332 #undef __FUNC__
333 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
334 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
335 {
336   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
337   MPI_Status  *send_status,recv_status;
338   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
339   int         row,col,other_disassembled;
340   Scalar      *values,val;
341   InsertMode  addv = mat->insertmode;
342 
343   /*  wait on receives */
344   while (count) {
345     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
346     /* unpack receives into our local space */
347     values = aij->rvalues + 3*imdex*aij->rmax;
348     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
349     n = n/3;
350     for ( i=0; i<n; i++ ) {
351       row = (int) PetscReal(values[3*i]) - aij->rstart;
352       col = (int) PetscReal(values[3*i+1]);
353       val = values[3*i+2];
354       if (col >= aij->cstart && col < aij->cend) {
355         col -= aij->cstart;
356         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
357       }
358       else {
359         if (mat->was_assembled || mat->assembled) {
360           if (!aij->colmap) {
361             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
362           }
363           col = aij->colmap[col] - 1;
364           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
365             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
366             col = (int) PetscReal(values[3*i+1]);
367           }
368         }
369         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
370       }
371     }
372     count--;
373   }
374   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
375 
376   /* wait on sends */
377   if (aij->nsends) {
378     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
379     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
380     PetscFree(send_status);
381   }
382   PetscFree(aij->send_waits); PetscFree(aij->svalues);
383 
384   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
385   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
386 
387   /* determine if any processor has disassembled, if so we must
388      also disassemble ourselfs, in order that we may reassemble. */
389   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
390   if (mat->was_assembled && !other_disassembled) {
391     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
396   }
397   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
399 
400   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
401   return 0;
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 static int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
411   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
412   return 0;
413 }
414 
415 /* the code does not do the diagonal entries correctly unless the
416    matrix is square and the column and row owerships are identical.
417    This is a BUG. The only way to fix it seems to be to access
418    aij->A and aij->B directly and not through the MatZeroRows()
419    routine.
420 */
421 #undef __FUNC__
422 #define __FUNC__ "MatZeroRows_MPIAIJ"
423 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
426   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
427   int            *procs,*nprocs,j,found,idx,nsends,*work;
428   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
429   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
430   int            *lens,imdex,*lrows,*values;
431   MPI_Comm       comm = A->comm;
432   MPI_Request    *send_waits,*recv_waits;
433   MPI_Status     recv_status,*send_status;
434   IS             istmp;
435 
436   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
437   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
438 
439   /*  first count number of contributors to each processor */
440   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
441   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
442   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
443   for ( i=0; i<N; i++ ) {
444     idx = rows[i];
445     found = 0;
446     for ( j=0; j<size; j++ ) {
447       if (idx >= owners[j] && idx < owners[j+1]) {
448         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
449       }
450     }
451     if (!found) SETERRQ(1,0,"Index out of range");
452   }
453   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
454 
455   /* inform other processors of number of messages and max length*/
456   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
457   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
458   nrecvs = work[rank];
459   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
460   nmax = work[rank];
461   PetscFree(work);
462 
463   /* post receives:   */
464   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
465   CHKPTRQ(rvalues);
466   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
467   CHKPTRQ(recv_waits);
468   for ( i=0; i<nrecvs; i++ ) {
469     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
477   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
478   CHKPTRQ(send_waits);
479   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
480   starts[0] = 0;
481   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
482   for ( i=0; i<N; i++ ) {
483     svalues[starts[owner[i]]++] = rows[i];
484   }
485   ISRestoreIndices(is,&rows);
486 
487   starts[0] = 0;
488   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
489   count = 0;
490   for ( i=0; i<size; i++ ) {
491     if (procs[i]) {
492       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
493     }
494   }
495   PetscFree(starts);
496 
497   base = owners[rank];
498 
499   /*  wait on receives */
500   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
501   source = lens + nrecvs;
502   count  = nrecvs; slen = 0;
503   while (count) {
504     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
505     /* unpack receives into our local space */
506     MPI_Get_count(&recv_status,MPI_INT,&n);
507     source[imdex]  = recv_status.MPI_SOURCE;
508     lens[imdex]  = n;
509     slen += n;
510     count--;
511   }
512   PetscFree(recv_waits);
513 
514   /* move the data into the send scatter */
515   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
516   count = 0;
517   for ( i=0; i<nrecvs; i++ ) {
518     values = rvalues + i*nmax;
519     for ( j=0; j<lens[i]; j++ ) {
520       lrows[count++] = values[j] - base;
521     }
522   }
523   PetscFree(rvalues); PetscFree(lens);
524   PetscFree(owner); PetscFree(nprocs);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PLogObjectParent(A,istmp);
529   PetscFree(lrows);
530   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
531   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
532   ierr = ISDestroy(istmp); CHKERRQ(ierr);
533 
534   /* wait on sends */
535   if (nsends) {
536     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
537     CHKPTRQ(send_status);
538     MPI_Waitall(nsends,send_waits,send_status);
539     PetscFree(send_status);
540   }
541   PetscFree(send_waits); PetscFree(svalues);
542 
543   return 0;
544 }
545 
546 #undef __FUNC__
547 #define __FUNC__ "MatMult_MPIAIJ"
548 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
549 {
550   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
551   int        ierr,nt;
552 
553   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
554   if (nt != a->n) {
555     SETERRQ(1,0,"Incompatible partition of A and xx");
556   }
557   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
558   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
559   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
560   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
561   return 0;
562 }
563 
564 #undef __FUNC__
565 #define __FUNC__ "MatMultAdd_MPIAIJ"
566 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
567 {
568   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
569   int        ierr;
570   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
572   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
573   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
574   return 0;
575 }
576 
577 #undef __FUNC__
578 #define __FUNC__ "MatMultTrans_MPIAIJ"
579 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
582   int        ierr;
583 
584   /* do nondiagonal part */
585   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
586   /* send it on its way */
587   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
588   /* do local part */
589   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
590   /* receive remote parts: note this assumes the values are not actually */
591   /* inserted in yy until the next line, which is true for my implementation*/
592   /* but is not perhaps always true. */
593   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
594   return 0;
595 }
596 
597 #undef __FUNC__
598 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
599 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
602   int        ierr;
603 
604   /* do nondiagonal part */
605   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
606   /* send it on its way */
607   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
608   /* do local part */
609   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
610   /* receive remote parts: note this assumes the values are not actually */
611   /* inserted in yy until the next line, which is true for my implementation*/
612   /* but is not perhaps always true. */
613   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
614   return 0;
615 }
616 
617 /*
618   This only works correctly for square matrices where the subblock A->A is the
619    diagonal block
620 */
621 #undef __FUNC__
622 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
623 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
624 {
625   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
626   if (a->M != a->N)
627     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
628   if (a->rstart != a->cstart || a->rend != a->cend) {
629     SETERRQ(1,0,"row partition must equal col partition");  }
630   return MatGetDiagonal(a->A,v);
631 }
632 
633 #undef __FUNC__
634 #define __FUNC__ "MatScale_MPIAIJ"
635 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
638   int        ierr;
639   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
640   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
641   return 0;
642 }
643 
644 #undef __FUNC__
645 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
646 static int MatDestroy_MPIAIJ(PetscObject obj)
647 {
648   Mat        mat = (Mat) obj;
649   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
650   int        ierr;
651 #if defined(PETSC_LOG)
652   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
653 #endif
654   PetscFree(aij->rowners);
655   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
656   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
657   if (aij->colmap) PetscFree(aij->colmap);
658   if (aij->garray) PetscFree(aij->garray);
659   if (aij->lvec)   VecDestroy(aij->lvec);
660   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
661   if (aij->rowvalues) PetscFree(aij->rowvalues);
662   PetscFree(aij);
663   if (mat->mapping) {
664     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
665   }
666   PLogObjectDestroy(mat);
667   PetscHeaderDestroy(mat);
668   return 0;
669 }
670 #include "draw.h"
671 #include "pinclude/pviewer.h"
672 
673 #undef __FUNC__
674 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
675 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
676 {
677   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
678   int         ierr;
679 
680   if (aij->size == 1) {
681     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
682   }
683   else SETERRQ(1,0,"Only uniprocessor output supported");
684   return 0;
685 }
686 
687 #undef __FUNC__
688 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
689 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
690 {
691   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
692   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
693   int         ierr, format,shift = C->indexshift,rank;
694   FILE        *fd;
695   ViewerType  vtype;
696 
697   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
698   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
699     ierr = ViewerGetFormat(viewer,&format);
700     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
701       MatInfo info;
702       int     flg;
703       MPI_Comm_rank(mat->comm,&rank);
704       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
705       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
706       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
707       PetscSequentialPhaseBegin(mat->comm,1);
708       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
709          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
710       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
711          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
712       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
713       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
714       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
715       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
716       fflush(fd);
717       PetscSequentialPhaseEnd(mat->comm,1);
718       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
719       return 0;
720     }
721     else if (format == VIEWER_FORMAT_ASCII_INFO) {
722       return 0;
723     }
724   }
725 
726   if (vtype == DRAW_VIEWER) {
727     Draw       draw;
728     PetscTruth isnull;
729     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
730     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
731   }
732 
733   if (vtype == ASCII_FILE_VIEWER) {
734     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
735     PetscSequentialPhaseBegin(mat->comm,1);
736     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
737            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
738            aij->cend);
739     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
740     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
741     fflush(fd);
742     PetscSequentialPhaseEnd(mat->comm,1);
743   }
744   else {
745     int size = aij->size;
746     rank = aij->rank;
747     if (size == 1) {
748       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
749     }
750     else {
751       /* assemble the entire matrix onto first processor. */
752       Mat         A;
753       Mat_SeqAIJ *Aloc;
754       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
755       Scalar      *a;
756 
757       if (!rank) {
758         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
759                CHKERRQ(ierr);
760       }
761       else {
762         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
763                CHKERRQ(ierr);
764       }
765       PLogObjectParent(mat,A);
766 
767       /* copy over the A part */
768       Aloc = (Mat_SeqAIJ*) aij->A->data;
769       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
770       row = aij->rstart;
771       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
772       for ( i=0; i<m; i++ ) {
773         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
774         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
775       }
776       aj = Aloc->j;
777       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
778 
779       /* copy over the B part */
780       Aloc = (Mat_SeqAIJ*) aij->B->data;
781       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
782       row = aij->rstart;
783       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
784       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
785       for ( i=0; i<m; i++ ) {
786         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
787         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
788       }
789       PetscFree(ct);
790       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
791       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
792       if (!rank) {
793         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
794       }
795       ierr = MatDestroy(A); CHKERRQ(ierr);
796     }
797   }
798   return 0;
799 }
800 
801 #undef __FUNC__
802 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
803 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
804 {
805   Mat         mat = (Mat) obj;
806   int         ierr;
807   ViewerType  vtype;
808 
809   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
810   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
811       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
812     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
813   }
814   else if (vtype == BINARY_FILE_VIEWER) {
815     return MatView_MPIAIJ_Binary(mat,viewer);
816   }
817   return 0;
818 }
819 
820 /*
821     This has to provide several versions.
822 
823      2) a) use only local smoothing updating outer values only once.
824         b) local smoothing updating outer values each inner iteration
825      3) color updating out values betwen colors.
826 */
827 #undef __FUNC__
828 #define __FUNC__ "MatRelax_MPIAIJ"
829 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
830                            double fshift,int its,Vec xx)
831 {
832   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
833   Mat        AA = mat->A, BB = mat->B;
834   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
835   Scalar     *b,*x,*xs,*ls,d,*v,sum;
836   int        ierr,*idx, *diag;
837   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
838 
839   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
840   xs = x + shift; /* shift by one for index start of 1 */
841   ls = ls + shift;
842   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
843   diag = A->diag;
844   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
845     if (flag & SOR_ZERO_INITIAL_GUESS) {
846       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
847     }
848     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
849     CHKERRQ(ierr);
850     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
851     CHKERRQ(ierr);
852     while (its--) {
853       /* go down through the rows */
854       for ( i=0; i<m; i++ ) {
855         n    = A->i[i+1] - A->i[i];
856         idx  = A->j + A->i[i] + shift;
857         v    = A->a + A->i[i] + shift;
858         sum  = b[i];
859         SPARSEDENSEMDOT(sum,xs,v,idx,n);
860         d    = fshift + A->a[diag[i]+shift];
861         n    = B->i[i+1] - B->i[i];
862         idx  = B->j + B->i[i] + shift;
863         v    = B->a + B->i[i] + shift;
864         SPARSEDENSEMDOT(sum,ls,v,idx,n);
865         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
866       }
867       /* come up through the rows */
868       for ( i=m-1; i>-1; i-- ) {
869         n    = A->i[i+1] - A->i[i];
870         idx  = A->j + A->i[i] + shift;
871         v    = A->a + A->i[i] + shift;
872         sum  = b[i];
873         SPARSEDENSEMDOT(sum,xs,v,idx,n);
874         d    = fshift + A->a[diag[i]+shift];
875         n    = B->i[i+1] - B->i[i];
876         idx  = B->j + B->i[i] + shift;
877         v    = B->a + B->i[i] + shift;
878         SPARSEDENSEMDOT(sum,ls,v,idx,n);
879         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
880       }
881     }
882   }
883   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
884     if (flag & SOR_ZERO_INITIAL_GUESS) {
885       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
886     }
887     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
888     CHKERRQ(ierr);
889     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
890     CHKERRQ(ierr);
891     while (its--) {
892       for ( i=0; i<m; i++ ) {
893         n    = A->i[i+1] - A->i[i];
894         idx  = A->j + A->i[i] + shift;
895         v    = A->a + A->i[i] + shift;
896         sum  = b[i];
897         SPARSEDENSEMDOT(sum,xs,v,idx,n);
898         d    = fshift + A->a[diag[i]+shift];
899         n    = B->i[i+1] - B->i[i];
900         idx  = B->j + B->i[i] + shift;
901         v    = B->a + B->i[i] + shift;
902         SPARSEDENSEMDOT(sum,ls,v,idx,n);
903         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
904       }
905     }
906   }
907   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
908     if (flag & SOR_ZERO_INITIAL_GUESS) {
909       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
910     }
911     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
912                             mat->Mvctx); CHKERRQ(ierr);
913     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
914                             mat->Mvctx); CHKERRQ(ierr);
915     while (its--) {
916       for ( i=m-1; i>-1; i-- ) {
917         n    = A->i[i+1] - A->i[i];
918         idx  = A->j + A->i[i] + shift;
919         v    = A->a + A->i[i] + shift;
920         sum  = b[i];
921         SPARSEDENSEMDOT(sum,xs,v,idx,n);
922         d    = fshift + A->a[diag[i]+shift];
923         n    = B->i[i+1] - B->i[i];
924         idx  = B->j + B->i[i] + shift;
925         v    = B->a + B->i[i] + shift;
926         SPARSEDENSEMDOT(sum,ls,v,idx,n);
927         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
928       }
929     }
930   }
931   else {
932     SETERRQ(1,0,"Option not supported");
933   }
934   return 0;
935 }
936 
937 #undef __FUNC__
938 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
939 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
940 {
941   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
942   Mat        A = mat->A, B = mat->B;
943   int        ierr;
944   double     isend[5], irecv[5];
945 
946   info->rows_global    = (double)mat->M;
947   info->columns_global = (double)mat->N;
948   info->rows_local     = (double)mat->m;
949   info->columns_local  = (double)mat->N;
950   info->block_size     = 1.0;
951   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
952   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
953   isend[3] = info->memory;  isend[4] = info->mallocs;
954   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
955   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
956   isend[3] += info->memory;  isend[4] += info->mallocs;
957   if (flag == MAT_LOCAL) {
958     info->nz_used      = isend[0];
959     info->nz_allocated = isend[1];
960     info->nz_unneeded  = isend[2];
961     info->memory       = isend[3];
962     info->mallocs      = isend[4];
963   } else if (flag == MAT_GLOBAL_MAX) {
964     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
965     info->nz_used      = irecv[0];
966     info->nz_allocated = irecv[1];
967     info->nz_unneeded  = irecv[2];
968     info->memory       = irecv[3];
969     info->mallocs      = irecv[4];
970   } else if (flag == MAT_GLOBAL_SUM) {
971     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
972     info->nz_used      = irecv[0];
973     info->nz_allocated = irecv[1];
974     info->nz_unneeded  = irecv[2];
975     info->memory       = irecv[3];
976     info->mallocs      = irecv[4];
977   }
978   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
979   info->fill_ratio_needed = 0;
980   info->factor_mallocs    = 0;
981 
982   return 0;
983 }
984 
985 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
986 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
987 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
988 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
989 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
990 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
991 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
992 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
993 
994 #undef __FUNC__
995 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
996 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
997 {
998   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
999 
1000   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1001       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1002       op == MAT_COLUMNS_UNSORTED ||
1003       op == MAT_COLUMNS_SORTED ||
1004       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1005         MatSetOption(a->A,op);
1006         MatSetOption(a->B,op);
1007   } else if (op == MAT_ROW_ORIENTED) {
1008     a->roworiented = 1;
1009     MatSetOption(a->A,op);
1010     MatSetOption(a->B,op);
1011   } else if (op == MAT_ROWS_SORTED ||
1012              op == MAT_ROWS_UNSORTED ||
1013              op == MAT_SYMMETRIC ||
1014              op == MAT_STRUCTURALLY_SYMMETRIC ||
1015              op == MAT_YES_NEW_DIAGONALS)
1016     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1017   else if (op == MAT_COLUMN_ORIENTED) {
1018     a->roworiented = 0;
1019     MatSetOption(a->A,op);
1020     MatSetOption(a->B,op);
1021   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
1022     a->donotstash = 1;
1023   } else if (op == MAT_NO_NEW_DIAGONALS)
1024     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1025   else
1026     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1027   return 0;
1028 }
1029 
1030 #undef __FUNC__
1031 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1032 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1033 {
1034   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1035   *m = mat->M; *n = mat->N;
1036   return 0;
1037 }
1038 
1039 #undef __FUNC__
1040 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1041 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1042 {
1043   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1044   *m = mat->m; *n = mat->N;
1045   return 0;
1046 }
1047 
1048 #undef __FUNC__
1049 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1050 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1051 {
1052   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1053   *m = mat->rstart; *n = mat->rend;
1054   return 0;
1055 }
1056 
1057 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1058 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1059 
1060 #undef __FUNC__
1061 #define __FUNC__ "MatGetRow_MPIAIJ"
1062 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1063 {
1064   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1065   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1066   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1067   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1068   int        *cmap, *idx_p;
1069 
1070   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1071   mat->getrowactive = PETSC_TRUE;
1072 
1073   if (!mat->rowvalues && (idx || v)) {
1074     /*
1075         allocate enough space to hold information from the longest row.
1076     */
1077     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1078     int     max = 1,m = mat->m,tmp;
1079     for ( i=0; i<m; i++ ) {
1080       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1081       if (max < tmp) { max = tmp; }
1082     }
1083     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1084     CHKPTRQ(mat->rowvalues);
1085     mat->rowindices = (int *) (mat->rowvalues + max);
1086   }
1087 
1088   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1089   lrow = row - rstart;
1090 
1091   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1092   if (!v)   {pvA = 0; pvB = 0;}
1093   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1094   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1095   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1096   nztot = nzA + nzB;
1097 
1098   cmap  = mat->garray;
1099   if (v  || idx) {
1100     if (nztot) {
1101       /* Sort by increasing column numbers, assuming A and B already sorted */
1102       int imark = -1;
1103       if (v) {
1104         *v = v_p = mat->rowvalues;
1105         for ( i=0; i<nzB; i++ ) {
1106           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1107           else break;
1108         }
1109         imark = i;
1110         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1111         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1112       }
1113       if (idx) {
1114         *idx = idx_p = mat->rowindices;
1115         if (imark > -1) {
1116           for ( i=0; i<imark; i++ ) {
1117             idx_p[i] = cmap[cworkB[i]];
1118           }
1119         } else {
1120           for ( i=0; i<nzB; i++ ) {
1121             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1122             else break;
1123           }
1124           imark = i;
1125         }
1126         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1127         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1128       }
1129     }
1130     else {
1131       if (idx) *idx = 0;
1132       if (v)   *v   = 0;
1133     }
1134   }
1135   *nz = nztot;
1136   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1137   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1138   return 0;
1139 }
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1143 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1144 {
1145   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1146   if (aij->getrowactive == PETSC_FALSE) {
1147     SETERRQ(1,0,"MatGetRow not called");
1148   }
1149   aij->getrowactive = PETSC_FALSE;
1150   return 0;
1151 }
1152 
1153 #undef __FUNC__
1154 #define __FUNC__ "MatNorm_MPIAIJ"
1155 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1156 {
1157   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1158   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1159   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1160   double     sum = 0.0;
1161   Scalar     *v;
1162 
1163   if (aij->size == 1) {
1164     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1165   } else {
1166     if (type == NORM_FROBENIUS) {
1167       v = amat->a;
1168       for (i=0; i<amat->nz; i++ ) {
1169 #if defined(PETSC_COMPLEX)
1170         sum += real(conj(*v)*(*v)); v++;
1171 #else
1172         sum += (*v)*(*v); v++;
1173 #endif
1174       }
1175       v = bmat->a;
1176       for (i=0; i<bmat->nz; i++ ) {
1177 #if defined(PETSC_COMPLEX)
1178         sum += real(conj(*v)*(*v)); v++;
1179 #else
1180         sum += (*v)*(*v); v++;
1181 #endif
1182       }
1183       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1184       *norm = sqrt(*norm);
1185     }
1186     else if (type == NORM_1) { /* max column norm */
1187       double *tmp, *tmp2;
1188       int    *jj, *garray = aij->garray;
1189       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1190       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1191       PetscMemzero(tmp,aij->N*sizeof(double));
1192       *norm = 0.0;
1193       v = amat->a; jj = amat->j;
1194       for ( j=0; j<amat->nz; j++ ) {
1195         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1196       }
1197       v = bmat->a; jj = bmat->j;
1198       for ( j=0; j<bmat->nz; j++ ) {
1199         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1200       }
1201       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1202       for ( j=0; j<aij->N; j++ ) {
1203         if (tmp2[j] > *norm) *norm = tmp2[j];
1204       }
1205       PetscFree(tmp); PetscFree(tmp2);
1206     }
1207     else if (type == NORM_INFINITY) { /* max row norm */
1208       double ntemp = 0.0;
1209       for ( j=0; j<amat->m; j++ ) {
1210         v = amat->a + amat->i[j] + shift;
1211         sum = 0.0;
1212         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1213           sum += PetscAbsScalar(*v); v++;
1214         }
1215         v = bmat->a + bmat->i[j] + shift;
1216         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1217           sum += PetscAbsScalar(*v); v++;
1218         }
1219         if (sum > ntemp) ntemp = sum;
1220       }
1221       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1222     }
1223     else {
1224       SETERRQ(1,0,"No support for two norm");
1225     }
1226   }
1227   return 0;
1228 }
1229 
1230 #undef __FUNC__
1231 #define __FUNC__ "MatTranspose_MPIAIJ"
1232 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1233 {
1234   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1235   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1236   int        ierr,shift = Aloc->indexshift;
1237   Mat        B;
1238   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1239   Scalar     *array;
1240 
1241   if (matout == PETSC_NULL && M != N)
1242     SETERRQ(1,0,"Square matrix only for in-place");
1243   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1244          PETSC_NULL,&B); CHKERRQ(ierr);
1245 
1246   /* copy over the A part */
1247   Aloc = (Mat_SeqAIJ*) a->A->data;
1248   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1249   row = a->rstart;
1250   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1251   for ( i=0; i<m; i++ ) {
1252     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1253     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1254   }
1255   aj = Aloc->j;
1256   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1257 
1258   /* copy over the B part */
1259   Aloc = (Mat_SeqAIJ*) a->B->data;
1260   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1261   row = a->rstart;
1262   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1263   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1264   for ( i=0; i<m; i++ ) {
1265     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1266     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1267   }
1268   PetscFree(ct);
1269   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1270   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1271   if (matout != PETSC_NULL) {
1272     *matout = B;
1273   } else {
1274     /* This isn't really an in-place transpose .... but free data structures from a */
1275     PetscFree(a->rowners);
1276     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1277     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1278     if (a->colmap) PetscFree(a->colmap);
1279     if (a->garray) PetscFree(a->garray);
1280     if (a->lvec) VecDestroy(a->lvec);
1281     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1282     PetscFree(a);
1283     PetscMemcpy(A,B,sizeof(struct _Mat));
1284     PetscHeaderDestroy(B);
1285   }
1286   return 0;
1287 }
1288 
1289 #undef __FUNC__
1290 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1291 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1292 {
1293   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1294   Mat a = aij->A, b = aij->B;
1295   int ierr,s1,s2,s3;
1296 
1297   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1298   if (rr) {
1299     s3 = aij->n;
1300     VecGetLocalSize_Fast(rr,s1);
1301     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1302     /* Overlap communication with computation. */
1303     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1304   }
1305   if (ll) {
1306     VecGetLocalSize_Fast(ll,s1);
1307     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1308     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1309   }
1310   /* scale  the diagonal block */
1311   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1312 
1313   if (rr) {
1314     /* Do a scatter end and then right scale the off-diagonal block */
1315     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1316     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1317   }
1318 
1319   return 0;
1320 }
1321 
1322 
1323 extern int MatPrintHelp_SeqAIJ(Mat);
1324 #undef __FUNC__
1325 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1326 static int MatPrintHelp_MPIAIJ(Mat A)
1327 {
1328   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1329 
1330   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1331   else return 0;
1332 }
1333 
1334 #undef __FUNC__
1335 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1336 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1337 {
1338   *bs = 1;
1339   return 0;
1340 }
1341 #undef __FUNC__
1342 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1343 static int MatSetUnfactored_MPIAIJ(Mat A)
1344 {
1345   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1346   int        ierr;
1347   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1348   return 0;
1349 }
1350 
1351 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1352 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1353 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1354 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1355 /* -------------------------------------------------------------------*/
1356 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1357        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1358        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1359        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1360        0,0,
1361        0,0,
1362        0,0,
1363        MatRelax_MPIAIJ,
1364        MatTranspose_MPIAIJ,
1365        MatGetInfo_MPIAIJ,0,
1366        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1367        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1368        0,
1369        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1370        0,0,0,0,
1371        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1372        0,0,
1373        0,0,MatConvertSameType_MPIAIJ,0,0,
1374        0,0,0,
1375        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1376        MatPrintHelp_MPIAIJ,
1377        MatScale_MPIAIJ,0,0,0,
1378        MatGetBlockSize_MPIAIJ,0,0,0,0,
1379        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1380 
1381 
1382 #undef __FUNC__
1383 #define __FUNC__ "MatCreateMPIAIJ"
1384 /*@C
1385    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1386    (the default parallel PETSc format).  For good matrix assembly performance
1387    the user should preallocate the matrix storage by setting the parameters
1388    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1389    performance can be increased by more than a factor of 50.
1390 
1391    Input Parameters:
1392 .  comm - MPI communicator
1393 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1394            This value should be the same as the local size used in creating the
1395            y vector for the matrix-vector product y = Ax.
1396 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1397            This value should be the same as the local size used in creating the
1398            x vector for the matrix-vector product y = Ax.
1399 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1400 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1401 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1402            (same for all local rows)
1403 .  d_nzz - array containing the number of nonzeros in the various rows of the
1404            diagonal portion of the local submatrix (possibly different for each row)
1405            or PETSC_NULL. You must leave room for the diagonal entry even if
1406            it is zero.
1407 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1408            submatrix (same for all local rows).
1409 .  o_nzz - array containing the number of nonzeros in the various rows of the
1410            off-diagonal portion of the local submatrix (possibly different for
1411            each row) or PETSC_NULL.
1412 
1413    Output Parameter:
1414 .  A - the matrix
1415 
1416    Notes:
1417    The AIJ format (also called the Yale sparse matrix format or
1418    compressed row storage), is fully compatible with standard Fortran 77
1419    storage.  That is, the stored row and column indices can begin at
1420    either one (as in Fortran) or zero.  See the users manual for details.
1421 
1422    The user MUST specify either the local or global matrix dimensions
1423    (possibly both).
1424 
1425    By default, this format uses inodes (identical nodes) when possible.
1426    We search for consecutive rows with the same nonzero structure, thereby
1427    reusing matrix information to achieve increased efficiency.
1428 
1429    Options Database Keys:
1430 $    -mat_aij_no_inode  - Do not use inodes
1431 $    -mat_aij_inode_limit <limit> - Set inode limit.
1432 $        (max limit=5)
1433 $    -mat_aij_oneindex - Internally use indexing starting at 1
1434 $        rather than 0.  Note: When calling MatSetValues(),
1435 $        the user still MUST index entries starting at 0!
1436 
1437    Storage Information:
1438    For a square global matrix we define each processor's diagonal portion
1439    to be its local rows and the corresponding columns (a square submatrix);
1440    each processor's off-diagonal portion encompasses the remainder of the
1441    local matrix (a rectangular submatrix).
1442 
1443    The user can specify preallocated storage for the diagonal part of
1444    the local submatrix with either d_nz or d_nnz (not both).  Set
1445    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1446    memory allocation.  Likewise, specify preallocated storage for the
1447    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1448 
1449    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1450    the figure below we depict these three local rows and all columns (0-11).
1451 
1452 $          0 1 2 3 4 5 6 7 8 9 10 11
1453 $         -------------------
1454 $  row 3  |  o o o d d d o o o o o o
1455 $  row 4  |  o o o d d d o o o o o o
1456 $  row 5  |  o o o d d d o o o o o o
1457 $         -------------------
1458 $
1459 
1460    Thus, any entries in the d locations are stored in the d (diagonal)
1461    submatrix, and any entries in the o locations are stored in the
1462    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1463    stored simply in the MATSEQAIJ format for compressed row storage.
1464 
1465    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1466    and o_nz should indicate the number of nonzeros per row in the o matrix.
1467    In general, for PDE problems in which most nonzeros are near the diagonal,
1468    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1469    or you will get TERRIBLE performance; see the users' manual chapter on
1470    matrices.
1471 
1472 .keywords: matrix, aij, compressed row, sparse, parallel
1473 
1474 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1475 @*/
1476 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1477                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1478 {
1479   Mat          B;
1480   Mat_MPIAIJ   *b;
1481   int          ierr, i,sum[2],work[2],size;
1482 
1483   *A = 0;
1484   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1485   PLogObjectCreate(B);
1486   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1487   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1488   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1489   MPI_Comm_size(comm,&size);
1490   if (size == 1) {
1491     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1492     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1493     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1494     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1495     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1496     B->ops.solve             = MatSolve_MPIAIJ;
1497     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1498     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1499     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1500     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1501   }
1502   B->destroy    = MatDestroy_MPIAIJ;
1503   B->view       = MatView_MPIAIJ;
1504   B->factor     = 0;
1505   B->assembled  = PETSC_FALSE;
1506   B->mapping    = 0;
1507 
1508   B->insertmode = NOT_SET_VALUES;
1509   MPI_Comm_rank(comm,&b->rank);
1510   MPI_Comm_size(comm,&b->size);
1511 
1512   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1513     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1514 
1515   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1516     work[0] = m; work[1] = n;
1517     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1518     if (M == PETSC_DECIDE) M = sum[0];
1519     if (N == PETSC_DECIDE) N = sum[1];
1520   }
1521   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1522   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1523   b->m = m; B->m = m;
1524   b->n = n; B->n = n;
1525   b->N = N; B->N = N;
1526   b->M = M; B->M = M;
1527 
1528   /* build local table of row and column ownerships */
1529   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1530   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1531   b->cowners = b->rowners + b->size + 2;
1532   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1533   b->rowners[0] = 0;
1534   for ( i=2; i<=b->size; i++ ) {
1535     b->rowners[i] += b->rowners[i-1];
1536   }
1537   b->rstart = b->rowners[b->rank];
1538   b->rend   = b->rowners[b->rank+1];
1539   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1540   b->cowners[0] = 0;
1541   for ( i=2; i<=b->size; i++ ) {
1542     b->cowners[i] += b->cowners[i-1];
1543   }
1544   b->cstart = b->cowners[b->rank];
1545   b->cend   = b->cowners[b->rank+1];
1546 
1547   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1548   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1549   PLogObjectParent(B,b->A);
1550   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1551   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1552   PLogObjectParent(B,b->B);
1553 
1554   /* build cache for off array entries formed */
1555   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1556   b->donotstash  = 0;
1557   b->colmap      = 0;
1558   b->garray      = 0;
1559   b->roworiented = 1;
1560 
1561   /* stuff used for matrix vector multiply */
1562   b->lvec      = 0;
1563   b->Mvctx     = 0;
1564 
1565   /* stuff for MatGetRow() */
1566   b->rowindices   = 0;
1567   b->rowvalues    = 0;
1568   b->getrowactive = PETSC_FALSE;
1569 
1570   *A = B;
1571   return 0;
1572 }
1573 
1574 #undef __FUNC__
1575 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1576 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1577 {
1578   Mat        mat;
1579   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1580   int        ierr, len=0, flg;
1581 
1582   *newmat       = 0;
1583   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1584   PLogObjectCreate(mat);
1585   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1586   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1587   mat->destroy    = MatDestroy_MPIAIJ;
1588   mat->view       = MatView_MPIAIJ;
1589   mat->factor     = matin->factor;
1590   mat->assembled  = PETSC_TRUE;
1591 
1592   a->m = mat->m   = oldmat->m;
1593   a->n = mat->n   = oldmat->n;
1594   a->M = mat->M   = oldmat->M;
1595   a->N = mat->N   = oldmat->N;
1596 
1597   a->rstart       = oldmat->rstart;
1598   a->rend         = oldmat->rend;
1599   a->cstart       = oldmat->cstart;
1600   a->cend         = oldmat->cend;
1601   a->size         = oldmat->size;
1602   a->rank         = oldmat->rank;
1603   mat->insertmode = NOT_SET_VALUES;
1604   a->rowvalues    = 0;
1605   a->getrowactive = PETSC_FALSE;
1606 
1607   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1608   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1609   a->cowners = a->rowners + a->size + 2;
1610   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1611   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1612   if (oldmat->colmap) {
1613     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1614     PLogObjectMemory(mat,(a->N)*sizeof(int));
1615     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1616   } else a->colmap = 0;
1617   if (oldmat->garray) {
1618     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1619     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1620     PLogObjectMemory(mat,len*sizeof(int));
1621     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1622   } else a->garray = 0;
1623 
1624   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1625   PLogObjectParent(mat,a->lvec);
1626   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1627   PLogObjectParent(mat,a->Mvctx);
1628   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1629   PLogObjectParent(mat,a->A);
1630   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1631   PLogObjectParent(mat,a->B);
1632   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1633   if (flg) {
1634     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1635   }
1636   *newmat = mat;
1637   return 0;
1638 }
1639 
1640 #include "sys.h"
1641 
1642 #undef __FUNC__
1643 #define __FUNC__ "MatLoad_MPIAIJ"
1644 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1645 {
1646   Mat          A;
1647   int          i, nz, ierr, j,rstart, rend, fd;
1648   Scalar       *vals,*svals;
1649   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1650   MPI_Status   status;
1651   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1652   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1653   int          tag = ((PetscObject)viewer)->tag;
1654 
1655   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1656   if (!rank) {
1657     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1658     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1659     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1660   }
1661 
1662   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1663   M = header[1]; N = header[2];
1664   /* determine ownership of all rows */
1665   m = M/size + ((M % size) > rank);
1666   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1667   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1668   rowners[0] = 0;
1669   for ( i=2; i<=size; i++ ) {
1670     rowners[i] += rowners[i-1];
1671   }
1672   rstart = rowners[rank];
1673   rend   = rowners[rank+1];
1674 
1675   /* distribute row lengths to all processors */
1676   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1677   offlens = ourlens + (rend-rstart);
1678   if (!rank) {
1679     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1680     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1681     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1682     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1683     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1684     PetscFree(sndcounts);
1685   }
1686   else {
1687     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1688   }
1689 
1690   if (!rank) {
1691     /* calculate the number of nonzeros on each processor */
1692     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1693     PetscMemzero(procsnz,size*sizeof(int));
1694     for ( i=0; i<size; i++ ) {
1695       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1696         procsnz[i] += rowlengths[j];
1697       }
1698     }
1699     PetscFree(rowlengths);
1700 
1701     /* determine max buffer needed and allocate it */
1702     maxnz = 0;
1703     for ( i=0; i<size; i++ ) {
1704       maxnz = PetscMax(maxnz,procsnz[i]);
1705     }
1706     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1707 
1708     /* read in my part of the matrix column indices  */
1709     nz = procsnz[0];
1710     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1711     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1712 
1713     /* read in every one elses and ship off */
1714     for ( i=1; i<size; i++ ) {
1715       nz = procsnz[i];
1716       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1717       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1718     }
1719     PetscFree(cols);
1720   }
1721   else {
1722     /* determine buffer space needed for message */
1723     nz = 0;
1724     for ( i=0; i<m; i++ ) {
1725       nz += ourlens[i];
1726     }
1727     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1728 
1729     /* receive message of column indices*/
1730     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1731     MPI_Get_count(&status,MPI_INT,&maxnz);
1732     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1733   }
1734 
1735   /* loop over local rows, determining number of off diagonal entries */
1736   PetscMemzero(offlens,m*sizeof(int));
1737   jj = 0;
1738   for ( i=0; i<m; i++ ) {
1739     for ( j=0; j<ourlens[i]; j++ ) {
1740       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1741       jj++;
1742     }
1743   }
1744 
1745   /* create our matrix */
1746   for ( i=0; i<m; i++ ) {
1747     ourlens[i] -= offlens[i];
1748   }
1749   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1750   A = *newmat;
1751   MatSetOption(A,MAT_COLUMNS_SORTED);
1752   for ( i=0; i<m; i++ ) {
1753     ourlens[i] += offlens[i];
1754   }
1755 
1756   if (!rank) {
1757     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1758 
1759     /* read in my part of the matrix numerical values  */
1760     nz = procsnz[0];
1761     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1762 
1763     /* insert into matrix */
1764     jj      = rstart;
1765     smycols = mycols;
1766     svals   = vals;
1767     for ( i=0; i<m; i++ ) {
1768       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1769       smycols += ourlens[i];
1770       svals   += ourlens[i];
1771       jj++;
1772     }
1773 
1774     /* read in other processors and ship out */
1775     for ( i=1; i<size; i++ ) {
1776       nz = procsnz[i];
1777       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1778       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1779     }
1780     PetscFree(procsnz);
1781   }
1782   else {
1783     /* receive numeric values */
1784     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1785 
1786     /* receive message of values*/
1787     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1788     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1789     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1790 
1791     /* insert into matrix */
1792     jj      = rstart;
1793     smycols = mycols;
1794     svals   = vals;
1795     for ( i=0; i<m; i++ ) {
1796       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1797       smycols += ourlens[i];
1798       svals   += ourlens[i];
1799       jj++;
1800     }
1801   }
1802   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1803 
1804   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1805   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1806   return 0;
1807 }
1808