xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3369ce9a255b285539dc954b9ad9cefcea2f84b6)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.194 1997/03/13 16:33:58 curfman Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = imax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66       for ( _i=0; _i<nrow; _i++ ) { \
67         if (rp[_i] > col1) break; \
68         if (rp[_i] == col1) { \
69           if (addv == ADD_VALUES) ap[_i] += value;   \
70           else                  ap[_i] = value; \
71           goto _noinsert; \
72         } \
73       }  \
74       if (nonew)  goto _noinsert; \
75       if (nrow >= rmax) { \
76         /* there is no extra room in row, therefore enlarge */ \
77         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
78         Scalar *new_a; \
79  \
80         /* malloc new storage space */ \
81         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
82         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
83         new_j   = (int *) (new_a + new_nz); \
84         new_i   = new_j + new_nz; \
85  \
86         /* copy over old data into new slots */ \
87         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
88         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
89         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
90         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
91         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
92                                                            len*sizeof(int)); \
93         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
94         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
95                                                            len*sizeof(Scalar));  \
96         /* free up old matrix storage */ \
97  \
98         PetscFree(a->a);  \
99         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
100         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
101         a->singlemalloc = 1; \
102  \
103         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
104         rmax = imax[row] = imax[row] + CHUNKSIZE; \
105         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
106         a->maxnz += CHUNKSIZE; \
107         a->reallocs++; \
108       } \
109       N = nrow++ - 1; a->nz++; \
110       /* shift up all the later entries in this row */ \
111       for ( ii=N; ii>=_i; ii-- ) { \
112         rp[ii+1] = rp[ii]; \
113         ap[ii+1] = ap[ii]; \
114       } \
115       rp[_i] = col1;  \
116       ap[_i] = value;  \
117       _noinsert: ; \
118       ailen[row] = nrow; \
119 }
120 
121 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
122 #undef __FUNC__
123 #define __FUNC__ "MatSetValues_MPIAIJ"
124 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
125 {
126   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
127   Scalar     value;
128   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
129   int        cstart = aij->cstart, cend = aij->cend,row,col;
130   int        roworiented = aij->roworiented;
131 
132   /* Some Variables required in the macro */
133   Mat        A = aij->A;
134   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
135   int        *rp,ii,nrow,_i,rmax, N, col1;
136   int        *imax = a->imax, *ai = a->i, *ailen = a->ilen;
137   int        *aj = a->j, nonew = a->nonew,shift = a->indexshift;
138   Scalar     *ap, *aa = a->a;
139 
140   for ( i=0; i<m; i++ ) {
141 #if defined(PETSC_BOPT_g)
142     if (im[i] < 0) SETERRQ(1,0,"Negative row");
143     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
144 #endif
145     if (im[i] >= rstart && im[i] < rend) {
146       row = im[i] - rstart;
147       for ( j=0; j<n; j++ ) {
148         if (in[j] >= cstart && in[j] < cend){
149           col = in[j] - cstart;
150           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
151           MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv);
152           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
153         }
154 #if defined(PETSC_BOPT_g)
155         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
156         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
157 #endif
158         else {
159           if (mat->was_assembled) {
160             if (!aij->colmap) {
161               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
162             }
163             col = aij->colmap[in[j]] - 1;
164             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
165               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
166               col =  in[j];
167             }
168           }
169           else col = in[j];
170           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
171           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
172         }
173       }
174     }
175     else {
176       if (roworiented && !aij->donotstash) {
177         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
178       }
179       else {
180         if (!aij->donotstash) {
181           row = im[i];
182           for ( j=0; j<n; j++ ) {
183             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
184           }
185         }
186       }
187     }
188   }
189   return 0;
190 }
191 
192 #undef __FUNC__
193 #define __FUNC__ "MatGetValues_MPIAIJ"
194 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
195 {
196   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
197   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
198   int        cstart = aij->cstart, cend = aij->cend,row,col;
199 
200   for ( i=0; i<m; i++ ) {
201     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
202     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
203     if (idxm[i] >= rstart && idxm[i] < rend) {
204       row = idxm[i] - rstart;
205       for ( j=0; j<n; j++ ) {
206         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
207         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
208         if (idxn[j] >= cstart && idxn[j] < cend){
209           col = idxn[j] - cstart;
210           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
211         }
212         else {
213           if (!aij->colmap) {
214             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
215           }
216           col = aij->colmap[idxn[j]] - 1;
217           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
218           else {
219             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
220           }
221         }
222       }
223     }
224     else {
225       SETERRQ(1,0,"Only local values currently supported");
226     }
227   }
228   return 0;
229 }
230 
231 #undef __FUNC__
232 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
233 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
234 {
235   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
236   MPI_Comm    comm = mat->comm;
237   int         size = aij->size, *owners = aij->rowners;
238   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
239   MPI_Request *send_waits,*recv_waits;
240   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
241   InsertMode  addv;
242   Scalar      *rvalues,*svalues;
243 
244   /* make sure all processors are either in INSERTMODE or ADDMODE */
245   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
246   if (addv == (ADD_VALUES|INSERT_VALUES)) {
247     SETERRQ(1,0,"Some processors inserted others added");
248   }
249   mat->insertmode = addv; /* in case this processor had no cache */
250 
251   /*  first count number of contributors to each processor */
252   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
253   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
254   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
255   for ( i=0; i<aij->stash.n; i++ ) {
256     idx = aij->stash.idx[i];
257     for ( j=0; j<size; j++ ) {
258       if (idx >= owners[j] && idx < owners[j+1]) {
259         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
260       }
261     }
262   }
263   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
264 
265   /* inform other processors of number of messages and max length*/
266   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
267   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
268   nreceives = work[rank];
269   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
270   nmax = work[rank];
271   PetscFree(work);
272 
273   /* post receives:
274        1) each message will consist of ordered pairs
275      (global index,value) we store the global index as a double
276      to simplify the message passing.
277        2) since we don't know how long each individual message is we
278      allocate the largest needed buffer for each receive. Potentially
279      this is a lot of wasted space.
280 
281 
282        This could be done better.
283   */
284   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
285   CHKPTRQ(rvalues);
286   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
287   CHKPTRQ(recv_waits);
288   for ( i=0; i<nreceives; i++ ) {
289     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
290               comm,recv_waits+i);
291   }
292 
293   /* do sends:
294       1) starts[i] gives the starting index in svalues for stuff going to
295          the ith processor
296   */
297   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
298   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
299   CHKPTRQ(send_waits);
300   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
301   starts[0] = 0;
302   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
303   for ( i=0; i<aij->stash.n; i++ ) {
304     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
305     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
306     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
307   }
308   PetscFree(owner);
309   starts[0] = 0;
310   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
311   count = 0;
312   for ( i=0; i<size; i++ ) {
313     if (procs[i]) {
314       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
315                 comm,send_waits+count++);
316     }
317   }
318   PetscFree(starts); PetscFree(nprocs);
319 
320   /* Free cache space */
321   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
322   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
323 
324   aij->svalues    = svalues;    aij->rvalues    = rvalues;
325   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
326   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
327   aij->rmax       = nmax;
328 
329   return 0;
330 }
331 extern int MatSetUpMultiply_MPIAIJ(Mat);
332 
333 #undef __FUNC__
334 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
335 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
336 {
337   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
338   MPI_Status  *send_status,recv_status;
339   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
340   int         row,col,other_disassembled;
341   Scalar      *values,val;
342   InsertMode  addv = mat->insertmode;
343 
344   /*  wait on receives */
345   while (count) {
346     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
347     /* unpack receives into our local space */
348     values = aij->rvalues + 3*imdex*aij->rmax;
349     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
350     n = n/3;
351     for ( i=0; i<n; i++ ) {
352       row = (int) PetscReal(values[3*i]) - aij->rstart;
353       col = (int) PetscReal(values[3*i+1]);
354       val = values[3*i+2];
355       if (col >= aij->cstart && col < aij->cend) {
356         col -= aij->cstart;
357         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
358       }
359       else {
360         if (mat->was_assembled || mat->assembled) {
361           if (!aij->colmap) {
362             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
363           }
364           col = aij->colmap[col] - 1;
365           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
366             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
367             col = (int) PetscReal(values[3*i+1]);
368           }
369         }
370         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
371       }
372     }
373     count--;
374   }
375   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
376 
377   /* wait on sends */
378   if (aij->nsends) {
379     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
380     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
381     PetscFree(send_status);
382   }
383   PetscFree(aij->send_waits); PetscFree(aij->svalues);
384 
385   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
386   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
387 
388   /* determine if any processor has disassembled, if so we must
389      also disassemble ourselfs, in order that we may reassemble. */
390   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
391   if (mat->was_assembled && !other_disassembled) {
392     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
393   }
394 
395   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
396     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
397   }
398   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
402   return 0;
403 }
404 
405 #undef __FUNC__
406 #define __FUNC__ "MatZeroEntries_MPIAIJ"
407 int MatZeroEntries_MPIAIJ(Mat A)
408 {
409   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
412   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
413   return 0;
414 }
415 
416 /* the code does not do the diagonal entries correctly unless the
417    matrix is square and the column and row owerships are identical.
418    This is a BUG. The only way to fix it seems to be to access
419    aij->A and aij->B directly and not through the MatZeroRows()
420    routine.
421 */
422 #undef __FUNC__
423 #define __FUNC__ "MatZeroRows_MPIAIJ"
424 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
425 {
426   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
427   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
428   int            *procs,*nprocs,j,found,idx,nsends,*work;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436 
437   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
438   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
439 
440   /*  first count number of contributors to each processor */
441   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
442   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
443   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
444   for ( i=0; i<N; i++ ) {
445     idx = rows[i];
446     found = 0;
447     for ( j=0; j<size; j++ ) {
448       if (idx >= owners[j] && idx < owners[j+1]) {
449         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
450       }
451     }
452     if (!found) SETERRQ(1,0,"Index out of range");
453   }
454   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
455 
456   /* inform other processors of number of messages and max length*/
457   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
458   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
459   nrecvs = work[rank];
460   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
461   nmax = work[rank];
462   PetscFree(work);
463 
464   /* post receives:   */
465   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
466   CHKPTRQ(rvalues);
467   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
468   CHKPTRQ(recv_waits);
469   for ( i=0; i<nrecvs; i++ ) {
470     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
471   }
472 
473   /* do sends:
474       1) starts[i] gives the starting index in svalues for stuff going to
475          the ith processor
476   */
477   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
478   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
479   CHKPTRQ(send_waits);
480   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
481   starts[0] = 0;
482   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
483   for ( i=0; i<N; i++ ) {
484     svalues[starts[owner[i]]++] = rows[i];
485   }
486   ISRestoreIndices(is,&rows);
487 
488   starts[0] = 0;
489   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
490   count = 0;
491   for ( i=0; i<size; i++ ) {
492     if (procs[i]) {
493       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
494     }
495   }
496   PetscFree(starts);
497 
498   base = owners[rank];
499 
500   /*  wait on receives */
501   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
502   source = lens + nrecvs;
503   count  = nrecvs; slen = 0;
504   while (count) {
505     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
506     /* unpack receives into our local space */
507     MPI_Get_count(&recv_status,MPI_INT,&n);
508     source[imdex]  = recv_status.MPI_SOURCE;
509     lens[imdex]  = n;
510     slen += n;
511     count--;
512   }
513   PetscFree(recv_waits);
514 
515   /* move the data into the send scatter */
516   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
517   count = 0;
518   for ( i=0; i<nrecvs; i++ ) {
519     values = rvalues + i*nmax;
520     for ( j=0; j<lens[i]; j++ ) {
521       lrows[count++] = values[j] - base;
522     }
523   }
524   PetscFree(rvalues); PetscFree(lens);
525   PetscFree(owner); PetscFree(nprocs);
526 
527   /* actually zap the local rows */
528   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
529   PLogObjectParent(A,istmp);
530   PetscFree(lrows);
531   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
532   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
533   ierr = ISDestroy(istmp); CHKERRQ(ierr);
534 
535   /* wait on sends */
536   if (nsends) {
537     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
538     CHKPTRQ(send_status);
539     MPI_Waitall(nsends,send_waits,send_status);
540     PetscFree(send_status);
541   }
542   PetscFree(send_waits); PetscFree(svalues);
543 
544   return 0;
545 }
546 
547 #undef __FUNC__
548 #define __FUNC__ "MatMult_MPIAIJ"
549 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
550 {
551   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
552   int        ierr,nt;
553 
554   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
555   if (nt != a->n) {
556     SETERRQ(1,0,"Incompatible partition of A and xx");
557   }
558   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
559   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
560   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
561   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
562   return 0;
563 }
564 
565 #undef __FUNC__
566 #define __FUNC__ "MatMultAdd_MPIAIJ"
567 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
568 {
569   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
570   int        ierr;
571   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
573   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
575   return 0;
576 }
577 
578 #undef __FUNC__
579 #define __FUNC__ "MatMultTrans_MPIAIJ"
580 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
583   int        ierr;
584 
585   /* do nondiagonal part */
586   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
587   /* send it on its way */
588   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
589   /* do local part */
590   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
591   /* receive remote parts: note this assumes the values are not actually */
592   /* inserted in yy until the next line, which is true for my implementation*/
593   /* but is not perhaps always true. */
594   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
595   return 0;
596 }
597 
598 #undef __FUNC__
599 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
600 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
603   int        ierr;
604 
605   /* do nondiagonal part */
606   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
607   /* send it on its way */
608   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
609   /* do local part */
610   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
611   /* receive remote parts: note this assumes the values are not actually */
612   /* inserted in yy until the next line, which is true for my implementation*/
613   /* but is not perhaps always true. */
614   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
615   return 0;
616 }
617 
618 /*
619   This only works correctly for square matrices where the subblock A->A is the
620    diagonal block
621 */
622 #undef __FUNC__
623 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
624 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
625 {
626   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
627   if (a->M != a->N)
628     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
629   if (a->rstart != a->cstart || a->rend != a->cend) {
630     SETERRQ(1,0,"row partition must equal col partition");  }
631   return MatGetDiagonal(a->A,v);
632 }
633 
634 #undef __FUNC__
635 #define __FUNC__ "MatScale_MPIAIJ"
636 int MatScale_MPIAIJ(Scalar *aa,Mat A)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
639   int        ierr;
640   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
641   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
642   return 0;
643 }
644 
645 #undef __FUNC__
646 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
647 int MatDestroy_MPIAIJ(PetscObject obj)
648 {
649   Mat        mat = (Mat) obj;
650   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
651   int        ierr;
652 #if defined(PETSC_LOG)
653   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
654 #endif
655   PetscFree(aij->rowners);
656   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
657   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
658   if (aij->colmap) PetscFree(aij->colmap);
659   if (aij->garray) PetscFree(aij->garray);
660   if (aij->lvec)   VecDestroy(aij->lvec);
661   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
662   if (aij->rowvalues) PetscFree(aij->rowvalues);
663   PetscFree(aij);
664   if (mat->mapping) {
665     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
666   }
667   PLogObjectDestroy(mat);
668   PetscHeaderDestroy(mat);
669   return 0;
670 }
671 
672 #undef __FUNC__
673 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
674 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
675 {
676   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
677   int         ierr;
678 
679   if (aij->size == 1) {
680     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
681   }
682   else SETERRQ(1,0,"Only uniprocessor output supported");
683   return 0;
684 }
685 
686 #undef __FUNC__
687 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
688 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
689 {
690   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
691   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
692   int         ierr, format,shift = C->indexshift,rank;
693   FILE        *fd;
694   ViewerType  vtype;
695 
696   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
697   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
698     ierr = ViewerGetFormat(viewer,&format);
699     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
700       MatInfo info;
701       int     flg;
702       MPI_Comm_rank(mat->comm,&rank);
703       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
705       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
706       PetscSequentialPhaseBegin(mat->comm,1);
707       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
708          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
709       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
710          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
711       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
712       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
713       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
714       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
715       fflush(fd);
716       PetscSequentialPhaseEnd(mat->comm,1);
717       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
718       return 0;
719     }
720     else if (format == VIEWER_FORMAT_ASCII_INFO) {
721       return 0;
722     }
723   }
724 
725   if (vtype == DRAW_VIEWER) {
726     Draw       draw;
727     PetscTruth isnull;
728     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
729     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
730   }
731 
732   if (vtype == ASCII_FILE_VIEWER) {
733     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
734     PetscSequentialPhaseBegin(mat->comm,1);
735     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
736            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
737            aij->cend);
738     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
739     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
740     fflush(fd);
741     PetscSequentialPhaseEnd(mat->comm,1);
742   }
743   else {
744     int size = aij->size;
745     rank = aij->rank;
746     if (size == 1) {
747       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
748     }
749     else {
750       /* assemble the entire matrix onto first processor. */
751       Mat         A;
752       Mat_SeqAIJ *Aloc;
753       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
754       Scalar      *a;
755 
756       if (!rank) {
757         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
758                CHKERRQ(ierr);
759       }
760       else {
761         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
762                CHKERRQ(ierr);
763       }
764       PLogObjectParent(mat,A);
765 
766       /* copy over the A part */
767       Aloc = (Mat_SeqAIJ*) aij->A->data;
768       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
769       row = aij->rstart;
770       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
771       for ( i=0; i<m; i++ ) {
772         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
773         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
774       }
775       aj = Aloc->j;
776       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
777 
778       /* copy over the B part */
779       Aloc = (Mat_SeqAIJ*) aij->B->data;
780       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
781       row = aij->rstart;
782       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
783       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
784       for ( i=0; i<m; i++ ) {
785         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
786         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
787       }
788       PetscFree(ct);
789       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
790       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
791       if (!rank) {
792         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
793       }
794       ierr = MatDestroy(A); CHKERRQ(ierr);
795     }
796   }
797   return 0;
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
802 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
803 {
804   Mat         mat = (Mat) obj;
805   int         ierr;
806   ViewerType  vtype;
807 
808   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
809   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
810       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
811     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
812   }
813   else if (vtype == BINARY_FILE_VIEWER) {
814     return MatView_MPIAIJ_Binary(mat,viewer);
815   }
816   return 0;
817 }
818 
819 /*
820     This has to provide several versions.
821 
822      2) a) use only local smoothing updating outer values only once.
823         b) local smoothing updating outer values each inner iteration
824      3) color updating out values betwen colors.
825 */
826 #undef __FUNC__
827 #define __FUNC__ "MatRelax_MPIAIJ"
828 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
829                            double fshift,int its,Vec xx)
830 {
831   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
832   Mat        AA = mat->A, BB = mat->B;
833   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
834   Scalar     *b,*x,*xs,*ls,d,*v,sum;
835   int        ierr,*idx, *diag;
836   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
837 
838   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
839   xs = x + shift; /* shift by one for index start of 1 */
840   ls = ls + shift;
841   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
842   diag = A->diag;
843   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
844     if (flag & SOR_ZERO_INITIAL_GUESS) {
845       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
846     }
847     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
848     CHKERRQ(ierr);
849     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
850     CHKERRQ(ierr);
851     while (its--) {
852       /* go down through the rows */
853       for ( i=0; i<m; i++ ) {
854         n    = A->i[i+1] - A->i[i];
855         idx  = A->j + A->i[i] + shift;
856         v    = A->a + A->i[i] + shift;
857         sum  = b[i];
858         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859         d    = fshift + A->a[diag[i]+shift];
860         n    = B->i[i+1] - B->i[i];
861         idx  = B->j + B->i[i] + shift;
862         v    = B->a + B->i[i] + shift;
863         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
865       }
866       /* come up through the rows */
867       for ( i=m-1; i>-1; i-- ) {
868         n    = A->i[i+1] - A->i[i];
869         idx  = A->j + A->i[i] + shift;
870         v    = A->a + A->i[i] + shift;
871         sum  = b[i];
872         SPARSEDENSEMDOT(sum,xs,v,idx,n);
873         d    = fshift + A->a[diag[i]+shift];
874         n    = B->i[i+1] - B->i[i];
875         idx  = B->j + B->i[i] + shift;
876         v    = B->a + B->i[i] + shift;
877         SPARSEDENSEMDOT(sum,ls,v,idx,n);
878         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
879       }
880     }
881   }
882   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
883     if (flag & SOR_ZERO_INITIAL_GUESS) {
884       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
885     }
886     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
887     CHKERRQ(ierr);
888     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
889     CHKERRQ(ierr);
890     while (its--) {
891       for ( i=0; i<m; i++ ) {
892         n    = A->i[i+1] - A->i[i];
893         idx  = A->j + A->i[i] + shift;
894         v    = A->a + A->i[i] + shift;
895         sum  = b[i];
896         SPARSEDENSEMDOT(sum,xs,v,idx,n);
897         d    = fshift + A->a[diag[i]+shift];
898         n    = B->i[i+1] - B->i[i];
899         idx  = B->j + B->i[i] + shift;
900         v    = B->a + B->i[i] + shift;
901         SPARSEDENSEMDOT(sum,ls,v,idx,n);
902         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
903       }
904     }
905   }
906   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
907     if (flag & SOR_ZERO_INITIAL_GUESS) {
908       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
909     }
910     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
911                             mat->Mvctx); CHKERRQ(ierr);
912     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
913                             mat->Mvctx); CHKERRQ(ierr);
914     while (its--) {
915       for ( i=m-1; i>-1; i-- ) {
916         n    = A->i[i+1] - A->i[i];
917         idx  = A->j + A->i[i] + shift;
918         v    = A->a + A->i[i] + shift;
919         sum  = b[i];
920         SPARSEDENSEMDOT(sum,xs,v,idx,n);
921         d    = fshift + A->a[diag[i]+shift];
922         n    = B->i[i+1] - B->i[i];
923         idx  = B->j + B->i[i] + shift;
924         v    = B->a + B->i[i] + shift;
925         SPARSEDENSEMDOT(sum,ls,v,idx,n);
926         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
927       }
928     }
929   }
930   else {
931     SETERRQ(1,0,"Option not supported");
932   }
933   return 0;
934 }
935 
936 #undef __FUNC__
937 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
938 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
939 {
940   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
941   Mat        A = mat->A, B = mat->B;
942   int        ierr;
943   double     isend[5], irecv[5];
944 
945   info->rows_global    = (double)mat->M;
946   info->columns_global = (double)mat->N;
947   info->rows_local     = (double)mat->m;
948   info->columns_local  = (double)mat->N;
949   info->block_size     = 1.0;
950   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
951   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
952   isend[3] = info->memory;  isend[4] = info->mallocs;
953   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
954   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
955   isend[3] += info->memory;  isend[4] += info->mallocs;
956   if (flag == MAT_LOCAL) {
957     info->nz_used      = isend[0];
958     info->nz_allocated = isend[1];
959     info->nz_unneeded  = isend[2];
960     info->memory       = isend[3];
961     info->mallocs      = isend[4];
962   } else if (flag == MAT_GLOBAL_MAX) {
963     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
964     info->nz_used      = irecv[0];
965     info->nz_allocated = irecv[1];
966     info->nz_unneeded  = irecv[2];
967     info->memory       = irecv[3];
968     info->mallocs      = irecv[4];
969   } else if (flag == MAT_GLOBAL_SUM) {
970     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
971     info->nz_used      = irecv[0];
972     info->nz_allocated = irecv[1];
973     info->nz_unneeded  = irecv[2];
974     info->memory       = irecv[3];
975     info->mallocs      = irecv[4];
976   }
977   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
978   info->fill_ratio_needed = 0;
979   info->factor_mallocs    = 0;
980 
981   return 0;
982 }
983 
984 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
985 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
986 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
987 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
988 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
989 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
990 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
991 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
992 
993 #undef __FUNC__
994 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
995 int MatSetOption_MPIAIJ(Mat A,MatOption op)
996 {
997   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
998 
999   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1000       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1001       op == MAT_COLUMNS_UNSORTED ||
1002       op == MAT_COLUMNS_SORTED ||
1003       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1004         MatSetOption(a->A,op);
1005         MatSetOption(a->B,op);
1006   } else if (op == MAT_ROW_ORIENTED) {
1007     a->roworiented = 1;
1008     MatSetOption(a->A,op);
1009     MatSetOption(a->B,op);
1010   } else if (op == MAT_ROWS_SORTED ||
1011              op == MAT_ROWS_UNSORTED ||
1012              op == MAT_SYMMETRIC ||
1013              op == MAT_STRUCTURALLY_SYMMETRIC ||
1014              op == MAT_YES_NEW_DIAGONALS)
1015     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1016   else if (op == MAT_COLUMN_ORIENTED) {
1017     a->roworiented = 0;
1018     MatSetOption(a->A,op);
1019     MatSetOption(a->B,op);
1020   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
1021     a->donotstash = 1;
1022   } else if (op == MAT_NO_NEW_DIAGONALS)
1023     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1024   else
1025     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1026   return 0;
1027 }
1028 
1029 #undef __FUNC__
1030 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1031 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1032 {
1033   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1034   *m = mat->M; *n = mat->N;
1035   return 0;
1036 }
1037 
1038 #undef __FUNC__
1039 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1040 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1041 {
1042   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1043   *m = mat->m; *n = mat->N;
1044   return 0;
1045 }
1046 
1047 #undef __FUNC__
1048 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1049 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1050 {
1051   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1052   *m = mat->rstart; *n = mat->rend;
1053   return 0;
1054 }
1055 
1056 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1057 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1058 
1059 #undef __FUNC__
1060 #define __FUNC__ "MatGetRow_MPIAIJ"
1061 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1062 {
1063   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1064   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1065   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1066   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1067   int        *cmap, *idx_p;
1068 
1069   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1070   mat->getrowactive = PETSC_TRUE;
1071 
1072   if (!mat->rowvalues && (idx || v)) {
1073     /*
1074         allocate enough space to hold information from the longest row.
1075     */
1076     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1077     int     max = 1,m = mat->m,tmp;
1078     for ( i=0; i<m; i++ ) {
1079       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1080       if (max < tmp) { max = tmp; }
1081     }
1082     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1083     CHKPTRQ(mat->rowvalues);
1084     mat->rowindices = (int *) (mat->rowvalues + max);
1085   }
1086 
1087   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1088   lrow = row - rstart;
1089 
1090   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1091   if (!v)   {pvA = 0; pvB = 0;}
1092   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1093   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1094   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1095   nztot = nzA + nzB;
1096 
1097   cmap  = mat->garray;
1098   if (v  || idx) {
1099     if (nztot) {
1100       /* Sort by increasing column numbers, assuming A and B already sorted */
1101       int imark = -1;
1102       if (v) {
1103         *v = v_p = mat->rowvalues;
1104         for ( i=0; i<nzB; i++ ) {
1105           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1106           else break;
1107         }
1108         imark = i;
1109         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1110         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1111       }
1112       if (idx) {
1113         *idx = idx_p = mat->rowindices;
1114         if (imark > -1) {
1115           for ( i=0; i<imark; i++ ) {
1116             idx_p[i] = cmap[cworkB[i]];
1117           }
1118         } else {
1119           for ( i=0; i<nzB; i++ ) {
1120             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1121             else break;
1122           }
1123           imark = i;
1124         }
1125         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1126         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1127       }
1128     }
1129     else {
1130       if (idx) *idx = 0;
1131       if (v)   *v   = 0;
1132     }
1133   }
1134   *nz = nztot;
1135   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1136   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1137   return 0;
1138 }
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1142 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1143 {
1144   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1145   if (aij->getrowactive == PETSC_FALSE) {
1146     SETERRQ(1,0,"MatGetRow not called");
1147   }
1148   aij->getrowactive = PETSC_FALSE;
1149   return 0;
1150 }
1151 
1152 #undef __FUNC__
1153 #define __FUNC__ "MatNorm_MPIAIJ"
1154 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1155 {
1156   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1157   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1158   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1159   double     sum = 0.0;
1160   Scalar     *v;
1161 
1162   if (aij->size == 1) {
1163     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1164   } else {
1165     if (type == NORM_FROBENIUS) {
1166       v = amat->a;
1167       for (i=0; i<amat->nz; i++ ) {
1168 #if defined(PETSC_COMPLEX)
1169         sum += real(conj(*v)*(*v)); v++;
1170 #else
1171         sum += (*v)*(*v); v++;
1172 #endif
1173       }
1174       v = bmat->a;
1175       for (i=0; i<bmat->nz; i++ ) {
1176 #if defined(PETSC_COMPLEX)
1177         sum += real(conj(*v)*(*v)); v++;
1178 #else
1179         sum += (*v)*(*v); v++;
1180 #endif
1181       }
1182       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1183       *norm = sqrt(*norm);
1184     }
1185     else if (type == NORM_1) { /* max column norm */
1186       double *tmp, *tmp2;
1187       int    *jj, *garray = aij->garray;
1188       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1189       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1190       PetscMemzero(tmp,aij->N*sizeof(double));
1191       *norm = 0.0;
1192       v = amat->a; jj = amat->j;
1193       for ( j=0; j<amat->nz; j++ ) {
1194         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1195       }
1196       v = bmat->a; jj = bmat->j;
1197       for ( j=0; j<bmat->nz; j++ ) {
1198         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1199       }
1200       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1201       for ( j=0; j<aij->N; j++ ) {
1202         if (tmp2[j] > *norm) *norm = tmp2[j];
1203       }
1204       PetscFree(tmp); PetscFree(tmp2);
1205     }
1206     else if (type == NORM_INFINITY) { /* max row norm */
1207       double ntemp = 0.0;
1208       for ( j=0; j<amat->m; j++ ) {
1209         v = amat->a + amat->i[j] + shift;
1210         sum = 0.0;
1211         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1212           sum += PetscAbsScalar(*v); v++;
1213         }
1214         v = bmat->a + bmat->i[j] + shift;
1215         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1216           sum += PetscAbsScalar(*v); v++;
1217         }
1218         if (sum > ntemp) ntemp = sum;
1219       }
1220       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1221     }
1222     else {
1223       SETERRQ(1,0,"No support for two norm");
1224     }
1225   }
1226   return 0;
1227 }
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ "MatTranspose_MPIAIJ"
1231 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1232 {
1233   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1234   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1235   int        ierr,shift = Aloc->indexshift;
1236   Mat        B;
1237   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1238   Scalar     *array;
1239 
1240   if (matout == PETSC_NULL && M != N)
1241     SETERRQ(1,0,"Square matrix only for in-place");
1242   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1243          PETSC_NULL,&B); CHKERRQ(ierr);
1244 
1245   /* copy over the A part */
1246   Aloc = (Mat_SeqAIJ*) a->A->data;
1247   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1248   row = a->rstart;
1249   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1250   for ( i=0; i<m; i++ ) {
1251     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1252     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1253   }
1254   aj = Aloc->j;
1255   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1256 
1257   /* copy over the B part */
1258   Aloc = (Mat_SeqAIJ*) a->B->data;
1259   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1260   row = a->rstart;
1261   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1262   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1263   for ( i=0; i<m; i++ ) {
1264     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1265     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1266   }
1267   PetscFree(ct);
1268   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1269   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1270   if (matout != PETSC_NULL) {
1271     *matout = B;
1272   } else {
1273     /* This isn't really an in-place transpose .... but free data structures from a */
1274     PetscFree(a->rowners);
1275     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1276     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1277     if (a->colmap) PetscFree(a->colmap);
1278     if (a->garray) PetscFree(a->garray);
1279     if (a->lvec) VecDestroy(a->lvec);
1280     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1281     PetscFree(a);
1282     PetscMemcpy(A,B,sizeof(struct _Mat));
1283     PetscHeaderDestroy(B);
1284   }
1285   return 0;
1286 }
1287 
1288 #undef __FUNC__
1289 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1290 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1291 {
1292   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1293   Mat a = aij->A, b = aij->B;
1294   int ierr,s1,s2,s3;
1295 
1296   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1297   if (rr) {
1298     s3 = aij->n;
1299     VecGetLocalSize_Fast(rr,s1);
1300     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1301     /* Overlap communication with computation. */
1302     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1303   }
1304   if (ll) {
1305     VecGetLocalSize_Fast(ll,s1);
1306     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1307     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1308   }
1309   /* scale  the diagonal block */
1310   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1311 
1312   if (rr) {
1313     /* Do a scatter end and then right scale the off-diagonal block */
1314     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1315     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1316   }
1317 
1318   return 0;
1319 }
1320 
1321 
1322 extern int MatPrintHelp_SeqAIJ(Mat);
1323 #undef __FUNC__
1324 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1325 int MatPrintHelp_MPIAIJ(Mat A)
1326 {
1327   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1328 
1329   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1330   else return 0;
1331 }
1332 
1333 #undef __FUNC__
1334 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1335 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1336 {
1337   *bs = 1;
1338   return 0;
1339 }
1340 #undef __FUNC__
1341 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1342 int MatSetUnfactored_MPIAIJ(Mat A)
1343 {
1344   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1345   int        ierr;
1346   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1347   return 0;
1348 }
1349 
1350 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1351 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1352 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1353 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1354 /* -------------------------------------------------------------------*/
1355 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1356        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1357        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1358        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1359        0,0,
1360        0,0,
1361        0,0,
1362        MatRelax_MPIAIJ,
1363        MatTranspose_MPIAIJ,
1364        MatGetInfo_MPIAIJ,0,
1365        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1366        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1367        0,
1368        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1369        0,0,0,0,
1370        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1371        0,0,
1372        0,0,MatConvertSameType_MPIAIJ,0,0,
1373        0,0,0,
1374        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1375        MatPrintHelp_MPIAIJ,
1376        MatScale_MPIAIJ,0,0,0,
1377        MatGetBlockSize_MPIAIJ,0,0,0,0,
1378        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1379 
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatCreateMPIAIJ"
1383 /*@C
1384    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1385    (the default parallel PETSc format).  For good matrix assembly performance
1386    the user should preallocate the matrix storage by setting the parameters
1387    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1388    performance can be increased by more than a factor of 50.
1389 
1390    Input Parameters:
1391 .  comm - MPI communicator
1392 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1393            This value should be the same as the local size used in creating the
1394            y vector for the matrix-vector product y = Ax.
1395 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1396            This value should be the same as the local size used in creating the
1397            x vector for the matrix-vector product y = Ax.
1398 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1399 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1400 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1401            (same for all local rows)
1402 .  d_nzz - array containing the number of nonzeros in the various rows of the
1403            diagonal portion of the local submatrix (possibly different for each row)
1404            or PETSC_NULL. You must leave room for the diagonal entry even if
1405            it is zero.
1406 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1407            submatrix (same for all local rows).
1408 .  o_nzz - array containing the number of nonzeros in the various rows of the
1409            off-diagonal portion of the local submatrix (possibly different for
1410            each row) or PETSC_NULL.
1411 
1412    Output Parameter:
1413 .  A - the matrix
1414 
1415    Notes:
1416    The AIJ format (also called the Yale sparse matrix format or
1417    compressed row storage), is fully compatible with standard Fortran 77
1418    storage.  That is, the stored row and column indices can begin at
1419    either one (as in Fortran) or zero.  See the users manual for details.
1420 
1421    The user MUST specify either the local or global matrix dimensions
1422    (possibly both).
1423 
1424    By default, this format uses inodes (identical nodes) when possible.
1425    We search for consecutive rows with the same nonzero structure, thereby
1426    reusing matrix information to achieve increased efficiency.
1427 
1428    Options Database Keys:
1429 $    -mat_aij_no_inode  - Do not use inodes
1430 $    -mat_aij_inode_limit <limit> - Set inode limit.
1431 $        (max limit=5)
1432 $    -mat_aij_oneindex - Internally use indexing starting at 1
1433 $        rather than 0.  Note: When calling MatSetValues(),
1434 $        the user still MUST index entries starting at 0!
1435 
1436    Storage Information:
1437    For a square global matrix we define each processor's diagonal portion
1438    to be its local rows and the corresponding columns (a square submatrix);
1439    each processor's off-diagonal portion encompasses the remainder of the
1440    local matrix (a rectangular submatrix).
1441 
1442    The user can specify preallocated storage for the diagonal part of
1443    the local submatrix with either d_nz or d_nnz (not both).  Set
1444    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1445    memory allocation.  Likewise, specify preallocated storage for the
1446    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1447 
1448    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1449    the figure below we depict these three local rows and all columns (0-11).
1450 
1451 $          0 1 2 3 4 5 6 7 8 9 10 11
1452 $         -------------------
1453 $  row 3  |  o o o d d d o o o o o o
1454 $  row 4  |  o o o d d d o o o o o o
1455 $  row 5  |  o o o d d d o o o o o o
1456 $         -------------------
1457 $
1458 
1459    Thus, any entries in the d locations are stored in the d (diagonal)
1460    submatrix, and any entries in the o locations are stored in the
1461    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1462    stored simply in the MATSEQAIJ format for compressed row storage.
1463 
1464    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1465    and o_nz should indicate the number of nonzeros per row in the o matrix.
1466    In general, for PDE problems in which most nonzeros are near the diagonal,
1467    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1468    or you will get TERRIBLE performance; see the users' manual chapter on
1469    matrices.
1470 
1471 .keywords: matrix, aij, compressed row, sparse, parallel
1472 
1473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1474 @*/
1475 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1476                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1477 {
1478   Mat          B;
1479   Mat_MPIAIJ   *b;
1480   int          ierr, i,sum[2],work[2],size;
1481 
1482   *A = 0;
1483   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1484   PLogObjectCreate(B);
1485   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1486   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1487   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1488   MPI_Comm_size(comm,&size);
1489   if (size == 1) {
1490     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1491     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1492     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1493     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1494     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1495     B->ops.solve             = MatSolve_MPIAIJ;
1496     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1497     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1498     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1499     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1500   }
1501   B->destroy    = MatDestroy_MPIAIJ;
1502   B->view       = MatView_MPIAIJ;
1503   B->factor     = 0;
1504   B->assembled  = PETSC_FALSE;
1505   B->mapping    = 0;
1506 
1507   B->insertmode = NOT_SET_VALUES;
1508   MPI_Comm_rank(comm,&b->rank);
1509   MPI_Comm_size(comm,&b->size);
1510 
1511   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1512     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1513 
1514   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1515     work[0] = m; work[1] = n;
1516     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1517     if (M == PETSC_DECIDE) M = sum[0];
1518     if (N == PETSC_DECIDE) N = sum[1];
1519   }
1520   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1521   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1522   b->m = m; B->m = m;
1523   b->n = n; B->n = n;
1524   b->N = N; B->N = N;
1525   b->M = M; B->M = M;
1526 
1527   /* build local table of row and column ownerships */
1528   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1529   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1530   b->cowners = b->rowners + b->size + 2;
1531   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1532   b->rowners[0] = 0;
1533   for ( i=2; i<=b->size; i++ ) {
1534     b->rowners[i] += b->rowners[i-1];
1535   }
1536   b->rstart = b->rowners[b->rank];
1537   b->rend   = b->rowners[b->rank+1];
1538   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1539   b->cowners[0] = 0;
1540   for ( i=2; i<=b->size; i++ ) {
1541     b->cowners[i] += b->cowners[i-1];
1542   }
1543   b->cstart = b->cowners[b->rank];
1544   b->cend   = b->cowners[b->rank+1];
1545 
1546   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1547   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1548   PLogObjectParent(B,b->A);
1549   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1550   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1551   PLogObjectParent(B,b->B);
1552 
1553   /* build cache for off array entries formed */
1554   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1555   b->donotstash  = 0;
1556   b->colmap      = 0;
1557   b->garray      = 0;
1558   b->roworiented = 1;
1559 
1560   /* stuff used for matrix vector multiply */
1561   b->lvec      = 0;
1562   b->Mvctx     = 0;
1563 
1564   /* stuff for MatGetRow() */
1565   b->rowindices   = 0;
1566   b->rowvalues    = 0;
1567   b->getrowactive = PETSC_FALSE;
1568 
1569   *A = B;
1570   return 0;
1571 }
1572 
1573 #undef __FUNC__
1574 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1575 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1576 {
1577   Mat        mat;
1578   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1579   int        ierr, len=0, flg;
1580 
1581   *newmat       = 0;
1582   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1583   PLogObjectCreate(mat);
1584   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1585   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1586   mat->destroy    = MatDestroy_MPIAIJ;
1587   mat->view       = MatView_MPIAIJ;
1588   mat->factor     = matin->factor;
1589   mat->assembled  = PETSC_TRUE;
1590 
1591   a->m = mat->m   = oldmat->m;
1592   a->n = mat->n   = oldmat->n;
1593   a->M = mat->M   = oldmat->M;
1594   a->N = mat->N   = oldmat->N;
1595 
1596   a->rstart       = oldmat->rstart;
1597   a->rend         = oldmat->rend;
1598   a->cstart       = oldmat->cstart;
1599   a->cend         = oldmat->cend;
1600   a->size         = oldmat->size;
1601   a->rank         = oldmat->rank;
1602   mat->insertmode = NOT_SET_VALUES;
1603   a->rowvalues    = 0;
1604   a->getrowactive = PETSC_FALSE;
1605 
1606   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1607   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1608   a->cowners = a->rowners + a->size + 2;
1609   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1610   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1611   if (oldmat->colmap) {
1612     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1613     PLogObjectMemory(mat,(a->N)*sizeof(int));
1614     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1615   } else a->colmap = 0;
1616   if (oldmat->garray) {
1617     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1618     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1619     PLogObjectMemory(mat,len*sizeof(int));
1620     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1621   } else a->garray = 0;
1622 
1623   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1624   PLogObjectParent(mat,a->lvec);
1625   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1626   PLogObjectParent(mat,a->Mvctx);
1627   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1628   PLogObjectParent(mat,a->A);
1629   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1630   PLogObjectParent(mat,a->B);
1631   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1632   if (flg) {
1633     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1634   }
1635   *newmat = mat;
1636   return 0;
1637 }
1638 
1639 #include "sys.h"
1640 
1641 #undef __FUNC__
1642 #define __FUNC__ "MatLoad_MPIAIJ"
1643 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1644 {
1645   Mat          A;
1646   int          i, nz, ierr, j,rstart, rend, fd;
1647   Scalar       *vals,*svals;
1648   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1649   MPI_Status   status;
1650   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1651   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1652   int          tag = ((PetscObject)viewer)->tag;
1653 
1654   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1655   if (!rank) {
1656     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1657     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1658     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1659   }
1660 
1661   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1662   M = header[1]; N = header[2];
1663   /* determine ownership of all rows */
1664   m = M/size + ((M % size) > rank);
1665   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1666   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1667   rowners[0] = 0;
1668   for ( i=2; i<=size; i++ ) {
1669     rowners[i] += rowners[i-1];
1670   }
1671   rstart = rowners[rank];
1672   rend   = rowners[rank+1];
1673 
1674   /* distribute row lengths to all processors */
1675   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1676   offlens = ourlens + (rend-rstart);
1677   if (!rank) {
1678     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1679     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1680     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1681     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1682     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1683     PetscFree(sndcounts);
1684   }
1685   else {
1686     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1687   }
1688 
1689   if (!rank) {
1690     /* calculate the number of nonzeros on each processor */
1691     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1692     PetscMemzero(procsnz,size*sizeof(int));
1693     for ( i=0; i<size; i++ ) {
1694       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1695         procsnz[i] += rowlengths[j];
1696       }
1697     }
1698     PetscFree(rowlengths);
1699 
1700     /* determine max buffer needed and allocate it */
1701     maxnz = 0;
1702     for ( i=0; i<size; i++ ) {
1703       maxnz = PetscMax(maxnz,procsnz[i]);
1704     }
1705     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1706 
1707     /* read in my part of the matrix column indices  */
1708     nz = procsnz[0];
1709     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1710     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1711 
1712     /* read in every one elses and ship off */
1713     for ( i=1; i<size; i++ ) {
1714       nz = procsnz[i];
1715       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1716       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1717     }
1718     PetscFree(cols);
1719   }
1720   else {
1721     /* determine buffer space needed for message */
1722     nz = 0;
1723     for ( i=0; i<m; i++ ) {
1724       nz += ourlens[i];
1725     }
1726     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1727 
1728     /* receive message of column indices*/
1729     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1730     MPI_Get_count(&status,MPI_INT,&maxnz);
1731     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1732   }
1733 
1734   /* loop over local rows, determining number of off diagonal entries */
1735   PetscMemzero(offlens,m*sizeof(int));
1736   jj = 0;
1737   for ( i=0; i<m; i++ ) {
1738     for ( j=0; j<ourlens[i]; j++ ) {
1739       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1740       jj++;
1741     }
1742   }
1743 
1744   /* create our matrix */
1745   for ( i=0; i<m; i++ ) {
1746     ourlens[i] -= offlens[i];
1747   }
1748   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1749   A = *newmat;
1750   MatSetOption(A,MAT_COLUMNS_SORTED);
1751   for ( i=0; i<m; i++ ) {
1752     ourlens[i] += offlens[i];
1753   }
1754 
1755   if (!rank) {
1756     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1757 
1758     /* read in my part of the matrix numerical values  */
1759     nz = procsnz[0];
1760     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1761 
1762     /* insert into matrix */
1763     jj      = rstart;
1764     smycols = mycols;
1765     svals   = vals;
1766     for ( i=0; i<m; i++ ) {
1767       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1768       smycols += ourlens[i];
1769       svals   += ourlens[i];
1770       jj++;
1771     }
1772 
1773     /* read in other processors and ship out */
1774     for ( i=1; i<size; i++ ) {
1775       nz = procsnz[i];
1776       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1777       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1778     }
1779     PetscFree(procsnz);
1780   }
1781   else {
1782     /* receive numeric values */
1783     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1784 
1785     /* receive message of values*/
1786     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1787     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1788     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1789 
1790     /* insert into matrix */
1791     jj      = rstart;
1792     smycols = mycols;
1793     svals   = vals;
1794     for ( i=0; i<m; i++ ) {
1795       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1796       smycols += ourlens[i];
1797       svals   += ourlens[i];
1798       jj++;
1799     }
1800   }
1801   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1802 
1803   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1804   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1805   return 0;
1806 }
1807